Source code for darts.metrics.metrics

"""
Metrics
-------

Some metrics to compare time series.
"""

import inspect
from functools import wraps
from inspect import signature
from typing import Callable, List, Optional, Sequence, Tuple, Union

import numpy as np

from darts import TimeSeries
from darts.dataprocessing import dtw
from darts.logging import get_logger, raise_log
from darts.utils import _build_tqdm_iterator, _parallel_apply, n_steps_between
from darts.utils.ts_utils import SeriesType, get_series_seq_type, series2seq

logger = get_logger(__name__)
TIME_AX = 0
COMP_AX = 1

# Note: for new metrics added to this module to be able to leverage the two decorators, it is required both having
# the `actual_series` and `pred_series` parameters, and not having other ``Sequence`` as args (since these decorators
# don't "unpack" parameters different from `actual_series` and `pred_series`). In those cases, the new metric must take
# care of dealing with Sequence[TimeSeries] and multivariate TimeSeries on its own (See mase() implementation).
METRIC_OUTPUT_TYPE = Union[float, List[float], np.ndarray, List[np.ndarray]]
METRIC_TYPE = Callable[
    ...,
    METRIC_OUTPUT_TYPE,
]


[docs]def multi_ts_support(func) -> Callable[..., METRIC_OUTPUT_TYPE]: """ This decorator further adapts the metrics that took as input two (or three for scaled metrics with `insample`) univariate/multivariate ``TimeSeries`` instances, adding support for equally-sized sequences of ``TimeSeries`` instances. The decorator computes the pairwise metric for ``TimeSeries`` with the same indices, and returns a float value that is computed as a function of all the pairwise metrics using a `series_reduction` subroutine passed as argument to the metric function. If a 'Sequence[TimeSeries]' is passed as input, this decorator provides also parallelisation of the metric evaluation regarding different ``TimeSeries`` (if the `n_jobs` parameter is not set 1). """ @wraps(func) def wrapper_multi_ts_support(*args, **kwargs): actual_series = ( kwargs["actual_series"] if "actual_series" in kwargs else args[0] ) pred_series = ( kwargs["pred_series"] if "pred_series" in kwargs else args[0] if "actual_series" in kwargs else args[1] ) params = signature(func).parameters n_jobs = kwargs.pop("n_jobs", params["n_jobs"].default) if not isinstance(n_jobs, int): raise_log(ValueError("n_jobs must be an integer"), logger=logger) verbose = kwargs.pop("verbose", params["verbose"].default) if not isinstance(verbose, bool): raise_log(ValueError("verbose must be a bool"), logger=logger) # sanity check reduction functions _ = _get_reduction( kwargs=kwargs, params=params, red_name="time_reduction", axis=TIME_AX, sanity_check=True, ) _ = _get_reduction( kwargs=kwargs, params=params, red_name="component_reduction", axis=COMP_AX, sanity_check=True, ) series_reduction = _get_reduction( kwargs=kwargs, params=params, red_name="series_reduction", axis=0, sanity_check=True, ) series_seq_type = get_series_seq_type(actual_series) actual_series = series2seq(actual_series) pred_series = series2seq(pred_series) if len(actual_series) != len(pred_series): raise_log( ValueError( f"Mismatch between number of series in `actual_series` (n={len(actual_series)}) and " f"`pred_series` (n={len(pred_series)})." ), logger=logger, ) num_series_in_args = int("actual_series" not in kwargs) + int( "pred_series" not in kwargs ) input_series = (actual_series, pred_series) kwargs.pop("actual_series", 0) kwargs.pop("pred_series", 0) # handle `insample` parameter for scaled metrics if "insample" in params: insample = kwargs.get("insample") if insample is None: insample = args[ 2 - ("actual_series" in kwargs) - ("pred_series" in kwargs) ] insample = [insample] if not isinstance(insample, Sequence) else insample if len(actual_series) != len(insample): raise_log( ValueError( f"Mismatch between number of series in `actual_series` (n={len(actual_series)}) and " f"`insample` series (n={len(insample)})." ), logger=logger, ) input_series += (insample,) num_series_in_args += int("insample" not in kwargs) kwargs.pop("insample", 0) iterator = _build_tqdm_iterator( iterable=zip(*input_series), verbose=verbose, total=len(actual_series), ) # `vals` is a list of series metrics of length `len(actual_series)`. Each metric has shape # `(n time steps, n components)`; # - n times step is `1` if `time_reduction` is other than `None` # - n components: is 1 if `component_reduction` is other than `None` vals = _parallel_apply( iterator=iterator, fn=func, n_jobs=n_jobs, fn_args=args[num_series_in_args:], fn_kwargs=kwargs, ) # we flatten metrics along the time axis if n time steps == 1, # and/or along component axis if n components == 1 vals = [ val[ slice(None) if val.shape[TIME_AX] != 1 else 0, slice(None) if val.shape[COMP_AX] != 1 else 0, ] for val in vals ] # reduce metrics along series axis if series_reduction is not None: vals = kwargs["series_reduction"](vals, axis=0) elif series_seq_type == SeriesType.SINGLE: vals = vals[0] # flatten along series axis if n series == 1 return vals return wrapper_multi_ts_support
[docs]def multivariate_support(func) -> Callable[..., METRIC_OUTPUT_TYPE]: """ This decorator transforms a metric function that takes as input two univariate TimeSeries instances into a function that takes two equally-sized multivariate TimeSeries instances, computes the pairwise univariate metrics for components with the same indices, and returns a float value that is computed as a function of all the univariate metrics using a `component_reduction` subroutine passed as argument to the metric function. """ @wraps(func) def wrapper_multivariate_support(*args, **kwargs) -> METRIC_OUTPUT_TYPE: params = signature(func).parameters # we can avoid checks about args and kwargs since the input is adjusted by the previous decorator actual_series = args[0] pred_series = args[1] num_series_in_args = 2 if actual_series.width != pred_series.width: raise_log( ValueError( f"Mismatch between number of components in `actual_series` " f"(n={actual_series.width}) and `pred_series` (n={pred_series.width}." ), logger=logger, ) # handle `insample` parameters for scaled metrics input_series = (actual_series, pred_series) if "insample" in params: insample = args[2] if actual_series.width != insample.width: raise_log( ValueError( f"Mismatch between number of components in `actual_series` " f"(n={actual_series.width}) and `insample` (n={insample.width}." ), logger=logger, ) input_series += (insample,) num_series_in_args += 1 vals = func(*input_series, *args[num_series_in_args:], **kwargs) if not 1 <= len(vals.shape) <= 2: raise_log( ValueError( "Metric output must have 1 dimension for aggregated metrics (e.g. `mae()`, ...), " "or 2 dimension for time dependent metrics (e.g. `ae()`, ...)" ), logger=logger, ) elif len(vals.shape) == 1: vals = np.expand_dims(vals, TIME_AX) time_reduction = _get_reduction( kwargs=kwargs, params=params, red_name="time_reduction", axis=TIME_AX, sanity_check=False, ) if time_reduction is not None: vals = np.expand_dims(time_reduction(vals, axis=TIME_AX), axis=TIME_AX) component_reduction = _get_reduction( kwargs=kwargs, params=params, red_name="component_reduction", axis=COMP_AX, sanity_check=False, ) if component_reduction is not None: vals = np.expand_dims(component_reduction(vals, axis=COMP_AX), axis=COMP_AX) return vals return wrapper_multivariate_support
def _get_values( vals: np.ndarray, stochastic_quantile: Optional[float] = 0.5 ) -> np.ndarray: """ Returns a deterministic or probabilistic numpy array from the values of a time series. For stochastic input values, return either all sample values with (stochastic_quantile=None) or the quantile sample value with (stochastic_quantile {>=0,<=1}) """ if vals.shape[2] == 1: # deterministic out = vals[:, :, 0] else: # stochastic if stochastic_quantile is None: out = vals else: out = np.quantile(vals, stochastic_quantile, axis=2) return out def _get_values_or_raise( series_a: TimeSeries, series_b: TimeSeries, intersect: bool, stochastic_quantile: Optional[float] = 0.5, remove_nan_union: bool = False, is_insample: bool = False, ) -> Tuple[np.ndarray, np.ndarray]: """Returns the processed numpy values of two time series. Processing can be customized with arguments `intersect, stochastic_quantile, remove_nan_union`. Parameters ---------- series_a A deterministic ``TimeSeries`` instance. If `is_insample=False`, it is the `actual_series`. Otherwise, it is the `insample` series. series_b A deterministic or stochastic ``TimeSeries`` instance (the predictions `pred_series`). intersect A boolean for whether to only consider the time intersection between `series_a` and `series_b` stochastic_quantile Optionally, for stochastic predicted series, return either all sample values with (`stochastic_quantile=None`) or any deterministic quantile sample values by setting `stochastic_quantile=quantile` {>=0,<=1}. remove_nan_union By setting `remove_non_union` to True, sets all values from `series_a` and `series_b` to `np.nan` at indices where any of the two series contain a NaN value. Only effective when `is_insample=False`. is_insample Whether `series_a` corresponds to the `insample` series for scaled metrics. Raises ------ ValueError If `is_insample=False` and the two time series do not have at least a partially overlapping time index. """ if not series_a.width == series_b.width: raise_log( ValueError("The two time series must have the same number of components"), logger=logger, ) if not isinstance(intersect, bool): raise_log(ValueError("The intersect parameter must be a bool"), logger=logger) make_copy = False if not is_insample: # get the time intersection and values of the two series (corresponds to `actual_series` and `pred_series` if series_a.has_same_time_as(series_b) or not intersect: vals_a_common = series_a.all_values(copy=make_copy) vals_b_common = series_b.all_values(copy=make_copy) else: vals_a_common = series_a.slice_intersect_values(series_b, copy=make_copy) vals_b_common = series_b.slice_intersect_values(series_a, copy=make_copy) if not len(vals_a_common) == len(vals_b_common): raise_log( ValueError( "The two time series must have at least a partially overlapping time index." ), logger=logger, ) vals_b_det = _get_values(vals_b_common, stochastic_quantile=stochastic_quantile) else: # for `insample` series we extract only values up until before start of `pred_series` # find how many steps `insample` overlaps into `series_b` end = ( n_steps_between( end=series_b.start_time(), start=series_a.end_time(), freq=series_a.freq ) - 1 ) if end > 0 or abs(end) >= len(series_a): raise_log( ValueError( "The `insample` series must start before the `pred_series` and " "extend at least until one time step before the start of `pred_series`." ), logger=logger, ) end = end or None vals_a_common = series_a.all_values(copy=make_copy)[:end] vals_b_det = None vals_a_det = _get_values(vals_a_common, stochastic_quantile=stochastic_quantile) if not remove_nan_union or is_insample: return vals_a_det, vals_b_det b_is_deterministic = bool(len(vals_b_det.shape) == 2) if b_is_deterministic: isnan_mask = np.logical_or(np.isnan(vals_a_det), np.isnan(vals_b_det)) isnan_mask_pred = isnan_mask else: isnan_mask = np.logical_or( np.isnan(vals_a_det), np.isnan(vals_b_det).any(axis=2) ) isnan_mask_pred = np.repeat( np.expand_dims(isnan_mask, axis=-1), vals_b_det.shape[2], axis=2 ) return np.where(isnan_mask, np.nan, vals_a_det), np.where( isnan_mask_pred, np.nan, vals_b_det ) def _get_wrapped_metric( func: Callable[..., METRIC_OUTPUT_TYPE] ) -> Callable[..., METRIC_OUTPUT_TYPE]: """Returns the inner metric function `func` which bypasses the decorators `multi_ts_support` and `multivariate_support`. It significantly decreases process time compared to calling `func` directly. Only use this to compute a pre-defined metric within the scope of another metric. """ return func.__wrapped__.__wrapped__ def _get_reduction( kwargs, params, red_name, axis, sanity_check: bool = True ) -> Optional[Callable[..., np.ndarray]]: """Returns the reduction function either from user kwargs or metric default. Optionally performs sanity checks for presence of `axis` parameter, and correct output type and reduced shape.""" if red_name not in params: return None red_fn = kwargs[red_name] if red_name in kwargs else params[red_name].default if not sanity_check: return red_fn if red_fn is not None: red_params = inspect.signature(red_fn).parameters if "axis" not in red_params: raise_log( ValueError( f"Invalid `{red_name}` function: Must have a parameter called `axis`." ), logger=logger, ) # verify `red_fn` reduces to array with correct shape shape_in = (2, 1) if axis == 0 else (1, 2) out = red_fn(np.zeros(shape_in), axis=axis) if not isinstance(out, np.ndarray): raise_log( ValueError( f"Invalid `{red_name}` function output type: Expected type " f"`np.ndarray`, received type=`{type(out)}`." ), logger=logger, ) shape_invalid = out.shape != (1,) if shape_invalid: raise_log( ValueError( f"Invalid `{red_name}` function output shape: The function must reduce an input " f"`np.ndarray` of shape (t, c) to a `np.ndarray` of shape `(c,)`. " f"However, the function reduced a test array of shape `{shape_in}` to " f"`{out.shape}`." ), logger=logger, ) return red_fn def _get_error_scale( insample: TimeSeries, pred_series: TimeSeries, m: int, metric: str, ): """Computes the error scale based on a naive seasonal forecasts on `insample` values with seasonality `m`.""" if not isinstance(m, int): raise_log( ValueError(f"Seasonality `m` must be of type `int`, recevied `m={m}`"), logger=logger, ) # `x_t` are the true `y` values before the start of `y_pred` x_t, _ = _get_values_or_raise( insample, pred_series, intersect=False, remove_nan_union=False, is_insample=True ) diff = x_t[m:] - x_t[:-m] if metric == "mae": scale = np.nanmean(np.abs(diff), axis=TIME_AX) elif metric == "mse": scale = np.nanmean(np.power(diff, 2), axis=TIME_AX) elif metric == "rmse": scale = np.sqrt(np.nanmean(np.power(diff, 2), axis=TIME_AX)) else: raise_log( ValueError( f"unknown `metric={metric}`. Must be one of ('mae', 'mse', 'rmse')." ), logger=logger, ) if np.isclose(scale, 0.0).any(): raise_log(ValueError("cannot use MASE with periodical signals"), logger=logger) return scale
[docs]@multi_ts_support @multivariate_support def err( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Error (ERR). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column and time step :math:`t` as: .. math:: y_t - \\hat{y}_t If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=False ) return y_true - y_pred
[docs]@multi_ts_support @multivariate_support def merr( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Error (MERR). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\frac{1}{T}\\sum_{t=1}^T{(y_t - \\hat{y}_t)} If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(err)( actual_series, pred_series, intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def ae( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Absolute Error (AE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column and time step :math:`t` as: .. math:: |y_t - \\hat{y}_t| If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. series_reduction Optionally, a function taking as input a ``np.ndarray`` and returning either a scalar value or a ``np.ndarray``. This function is used to aggregate the metrics in case the metric is evaluated on multiple series (e.g., on a ``Sequence[TimeSeries]``). By default, returns the metric for each series. Example: ``series_reduction=np.nanmean``, will return the average over all series metrics. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=False ) return np.abs(y_true - y_pred)
[docs]@multi_ts_support @multivariate_support def mae( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Absolute Error (MAE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\frac{1}{T}\\sum_{t=1}^T{|y_t - \\hat{y}_t|} If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. series_reduction Optionally, a function taking as input a ``np.ndarray`` and returning either a scalar value or a ``np.ndarray``. This function is used to aggregate the metrics in case the metric is evaluated on multiple series (e.g., on a ``Sequence[TimeSeries]``). By default, returns the metric for each series. Example: ``series_reduction=np.nanmean``, will return the average over all series metrics. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(ae)( actual_series, pred_series, intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def ase( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: int = 1, intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Absolute Scaled Error (ASE) (see [1]_ for more information on scaled forecasting errors). It is the Absolute Error (AE) scaled by the Mean AE (MAE) of the naive m-seasonal forecast. For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column and time step :math:`t` as: .. math:: \\frac{AE(y_{t_p+1:t_p+T}, \\hat{y}_{t_p+1:t_p+T})}{E_m}, where :math:`t_p` is the prediction time (one step before the first forecasted point), :math:`AE` is the Absolute Error (:func:`~darts.metrics.metrics.ae`), and :math:`E_m` is the Mean AE (MAE) of the naive m-seasonal forecast on the `insample` series :math:`y_{0:t_p}` (the true series ending at :math:`t_p`): .. math:: E_m = MAE(y_{m:t_p}, y_{0:t_p - m}). If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m The seasonality to use for differencing to compute the error scale :math:`E_m` (as described in the metric description). :math:`m=1` corresponds to a non-seasonal :math:`E_m` (e.g. naive repetition of the last observed value), whereas :math:`m>1` corresponds to a seasonal :math:`E_m`. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`y_t = y_{t-m}` ) or any series in `insample` does not end one time step before the start of the corresponding forecast in `pred_series`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. References ---------- .. [1] https://www.pmorgan.com.au/tutorials/mae%2C-mape%2C-mase-and-the-scaled-rmse/ """ error_scale = _get_error_scale(insample, pred_series, m=m, metric="mae") errors = _get_wrapped_metric(ae)( actual_series, pred_series, intersect, ) return errors / error_scale
[docs]@multi_ts_support @multivariate_support def mase( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: int = 1, intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Absolute Scaled Error (MASE) (see [1]_ for more information on scaled forecasting errors). It is the Mean Absolute Error (MAE) scaled by the MAE of the naive m-seasonal forecast. For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\frac{MAE(y_{t_p+1:t_p+T}, \\hat{y}_{t_p+1:t_p+T})}{E_m}, where :math:`t_p` is the prediction time (one step before the first forecasted point), :math:`MAE` is the Mean Absolute Error (:func:`~darts.metrics.metrics.mae`), and :math:`E_m` is the MAE of the naive m-seasonal forecast on the `insample` series :math:`y_{0:t_p}` (the true series ending at :math:`t_p`): .. math:: E_m = MAE(y_{m:t_p}, y_{0:t_p - m}). If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m The seasonality to use for differencing to compute the error scale :math:`E_m` (as described in the metric description). :math:`m=1` corresponds to a non-seasonal :math:`E_m` (e.g. naive repetition of the last observed value), whereas :math:`m>1` corresponds to a seasonal :math:`E_m`. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`y_t = y_{t-m}` ) or any series in `insample` does not end one time step before the start of the corresponding forecast in `pred_series`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. References ---------- .. [1] https://www.pmorgan.com.au/tutorials/mae%2C-mape%2C-mase-and-the-scaled-rmse/ """ return np.nanmean( _get_wrapped_metric(ase)( actual_series, pred_series, insample, m=m, intersect=intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def se( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Squared Error (SE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column and time step :math:`t` as: .. math:: (y_t - \\hat{y}_t)^2. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=False ) return (y_true - y_pred) ** 2
[docs]@multi_ts_support @multivariate_support def mse( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Squared Error (MSE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\frac{1}{T}\\sum_{t=1}^T{(y_t - \\hat{y}_t)^2}. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(se)( actual_series, pred_series, intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def sse( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: int = 1, intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Squared Scaled Error (SSE) (see [1]_ for more information on scaled forecasting errors). It is the Squared Error (SE) scaled by the Mean SE (MSE) of the naive m-seasonal forecast. For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column and time step :math:`t` as: .. math:: \\frac{SE(y_{t_p+1:t_p+T}, \\hat{y}_{t_p+1:t_p+T})}{E_m}, where :math:`t_p` is the prediction time (one step before the first forecasted point), :math:`SE` is the Squared Error (:func:`~darts.metrics.metrics.se`), and :math:`E_m` is the Mean SE (MSE) of the naive m-seasonal forecast on the `insample` series :math:`y_{0:t_p}` (the true series ending at :math:`t_p`): .. math:: E_m = MSE(y_{m:t_p}, y_{0:t_p - m}). If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m The seasonality to use for differencing to compute the error scale :math:`E_m` (as described in the metric description). :math:`m=1` corresponds to a non-seasonal :math:`E_m` (e.g. naive repetition of the last observed value), whereas :math:`m>1` corresponds to a seasonal :math:`E_m`. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`y_t = y_{t-m}` ) or any series in `insample` does not end one time step before the start of the corresponding forecast in `pred_series`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. References ---------- .. [1] https://www.pmorgan.com.au/tutorials/mae%2C-mape%2C-mase-and-the-scaled-rmse/ """ error_scale = _get_error_scale(insample, pred_series, m=m, metric="mse") errors = _get_wrapped_metric(se)( actual_series, pred_series, intersect, ) return errors / error_scale
[docs]@multi_ts_support @multivariate_support def msse( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: int = 1, intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Squared Scaled Error (MSSE) (see [1]_ for more information on scaled forecasting errors). It is the Mean Squared Error (MSE) scaled by the MSE of the naive m-seasonal forecast. For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\frac{MSE(y_{t_p+1:t_p+T}, \\hat{y}_{t_p+1:t_p+T})}{E_m}, where :math:`t_p` is the prediction time (one step before the first forecasted point), :math:`MSE` is the Mean Squared Error (:func:`~darts.metrics.metrics.mse`), and :math:`E_m` is the MSE of the naive m-seasonal forecast on the `insample` series :math:`y_{0:t_p}` (the true series ending at :math:`t_p`): .. math:: E_m = MSE(y_{m:t_p}, y_{0:t_p - m}). If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m The seasonality to use for differencing to compute the error scale :math:`E_m` (as described in the metric description). :math:`m=1` corresponds to a non-seasonal :math:`E_m` (e.g. naive repetition of the last observed value), whereas :math:`m>1` corresponds to a seasonal :math:`E_m`. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`y_t = y_{t-m}` ) or any series in `insample` does not end one time step before the start of the corresponding forecast in `pred_series`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. References ---------- .. [1] https://www.pmorgan.com.au/tutorials/mae%2C-mape%2C-mase-and-the-scaled-rmse/ """ return np.nanmean( _get_wrapped_metric(sse)( actual_series, pred_series, insample, m=m, intersect=intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def rmse( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Root Mean Squared Error (RMSE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\sqrt{\\frac{1}{T}\\sum_{t=1}^T{(y_t - \\hat{y}_t)^2}} If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.sqrt( _get_wrapped_metric(mse)( actual_series, pred_series, intersect, ) )
[docs]@multi_ts_support @multivariate_support def rmsse( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: int = 1, intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Root Mean Squared Scaled Error (RMSSE) (see [1]_ for more information on scaled forecasting errors). It is the Root Mean Squared Error (RMSE) scaled by the RMSE of the naive m-seasonal forecast. For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\frac{RMSE(y_{t_p+1:t_p+T}, \\hat{y}_{t_p+1:t_p+T})}{E_m}, where :math:`t_p` is the prediction time (one step before the first forecasted point), :math:`RMSE` is the Root Mean Squared Error (:func:`~darts.metrics.metrics.rmse`), and :math:`E_m` is the RMSE of the naive m-seasonal forecast on the `insample` series :math:`y_{0:t_p}` (the true series ending at :math:`t_p`): .. math:: E_m = RMSE(y_{m:t_p}, y_{0:t_p - m}). If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m The seasonality to use for differencing to compute the error scale :math:`E_m` (as described in the metric description). :math:`m=1` corresponds to a non-seasonal :math:`E_m` (e.g. naive repetition of the last observed value), whereas :math:`m>1` corresponds to a seasonal :math:`E_m`. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`y_t = y_{t-m}` ) or any series in `insample` does not end one time step before the start of the corresponding forecast in `pred_series`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. References ---------- .. [1] https://www.pmorgan.com.au/tutorials/mae%2C-mape%2C-mase-and-the-scaled-rmse/ """ error_scale = _get_error_scale(insample, pred_series, m=m, metric="rmse") errors = _get_wrapped_metric(rmse)( actual_series, pred_series, intersect, ) return errors / error_scale
[docs]@multi_ts_support @multivariate_support def sle( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Squared Log Error (SLE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column and time step :math:`t` as: .. math:: \\left(\\log{(y_t + 1)} - \\log{(\\hat{y} + 1)}\\right)^2 using the natural logarithm. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=False ) y_true, y_pred = np.log(y_true + 1), np.log(y_pred + 1) return (y_true - y_pred) ** 2
[docs]@multi_ts_support @multivariate_support def rmsle( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Root Mean Squared Log Error (RMSLE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: \\sqrt{\\frac{1}{T}\\sum_{t=1}^T{\\left(\\log{(y_t + 1)} - \\log{(\\hat{y}_t + 1)}\\right)^2}} using the natural logarithm. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.sqrt( np.nanmean( _get_wrapped_metric(sle)( actual_series, pred_series, intersect, ), axis=TIME_AX, ) )
[docs]@multi_ts_support @multivariate_support def ape( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Absolute Percentage Error (APE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column and time step :math:`t` with: .. math:: 100 \\cdot \\left| \\frac{y_t - \\hat{y}_t}{y_t} \\right| Note that it will raise a `ValueError` if :math:`y_t = 0` for some :math:`t`. Consider using the Absolute Scaled Error (:func:`~darts.metrics.metrics.ase`) in these cases. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If `actual_series` contains some zeros. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=False ) if not (y_true != 0).all(): raise_log( ValueError( "`actual_series` must be strictly positive to compute the MAPE." ), logger=logger, ) return 100.0 * np.abs((y_true - y_pred) / y_true)
[docs]@multi_ts_support @multivariate_support def mape( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Absolute Percentage Error (MAPE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column with: .. math:: 100 \\cdot \\frac{1}{T} \\sum_{t=1}^{T}{\\left| \\frac{y_t - \\hat{y}_t}{y_t} \\right|} Note that it will raise a `ValueError` if :math:`y_t = 0` for some :math:`t`. Consider using the Mean Absolute Scaled Error (:func:`~darts.metrics.metrics.mase`) in these cases. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If `actual_series` contains some zeros. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(ape)( actual_series, pred_series, intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def sape( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """symmetric Absolute Percentage Error (sAPE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column and time step :math:`t` with: .. math:: 200 \\cdot \\frac{\\left| y_t - \\hat{y}_t \\right|}{\\left| y_t \\right| + \\left| \\hat{y}_t \\right|} Note that it will raise a `ValueError` if :math:`\\left| y_t \\right| + \\left| \\hat{y}_t \\right| = 0` for some :math:`t`. Consider using the Absolute Scaled Error (:func:`~darts.metrics.metrics.ase`) in these cases. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If `actual_series` and `pred_series` contain some zeros at the same time index. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=True ) if not np.logical_or(y_true != 0, y_pred != 0).all(): raise_log( ValueError( "`actual_series` must be strictly positive to compute the sMAPE." ), logger=logger, ) return 200.0 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))
[docs]@multi_ts_support @multivariate_support def smape( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """symmetric Mean Absolute Percentage Error (sMAPE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column with: .. math:: 200 \\cdot \\frac{1}{T} \\sum_{t=1}^{T}{\\frac{\\left| y_t - \\hat{y}_t \\right|}{\\left| y_t \\right| + \\left| \\hat{y}_t \\right|} } Note that it will raise a `ValueError` if :math:`\\left| y_t \\right| + \\left| \\hat{y}_t \\right| = 0` for some :math:`t`. Consider using the Mean Absolute Scaled Error (:func:`~darts.metrics.metrics.mase`) in these cases. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `actual_series` and the `pred_series` contain some zeros at the same time index. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(sape)( actual_series, pred_series, intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def ope( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Overall Percentage Error (OPE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column with: .. math:: 100 \\cdot \\left| \\frac{\\sum_{t=1}^{T}{y_t} - \\sum_{t=1}^{T}{\\hat{y}_t}}{\\sum_{t=1}^{T}{y_t}} \\right|. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If :math:`\\sum_{t=1}^{T}{y_t} = 0`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=True ) y_true_sum, y_pred_sum = np.nansum(y_true, axis=TIME_AX), np.nansum( y_pred, axis=TIME_AX ) if not (y_true_sum > 0).all(): raise_log( ValueError( "The series of actual value cannot sum to zero when computing OPE." ), logger=logger, ) return np.abs((y_true_sum - y_pred_sum) / y_true_sum) * 100.0
[docs]@multi_ts_support @multivariate_support def arre( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Absolute Ranged Relative Error (ARRE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column and time step :math:`t` with: .. math:: 100 \\cdot \\left| \\frac{y_t - \\hat{y}_t} {\\max_t{y_t} - \\min_t{y_t}} \\right| If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If :math:`\\max_t{y_t} = \\min_t{y_t}`. Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=True ) y_max, y_min = np.nanmax(y_true, axis=TIME_AX), np.nanmin(y_true, axis=TIME_AX) if not (y_max > y_min).all(): raise_log( ValueError( "The difference between the max and min values must " "be strictly positive to compute the MARRE." ), logger=logger, ) true_range = y_max - y_min return 100.0 * np.abs((y_true - y_pred) / true_range)
[docs]@multi_ts_support @multivariate_support def marre( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Absolute Ranged Relative Error (MARRE). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed as a percentage value per component/column with: .. math:: 100 \\cdot \\frac{1}{T} \\sum_{t=1}^{T} {\\left| \\frac{y_t - \\hat{y}_t} {\\max_t{y_t} - \\min_t{y_t}} \\right|} If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If :math:`\\max_t{y_t} = \\min_t{y_t}`. float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(arre)( actual_series, pred_series, intersect, ), axis=TIME_AX, )
[docs]@multi_ts_support @multivariate_support def r2_score( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Coefficient of Determination :math:`R^2` (see [1]_ for more details). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as: .. math:: 1 - \\frac{\\sum_{t=1}^T{(y_t - \\hat{y}_t)^2}}{\\sum_{t=1}^T{(y_t - \\bar{y})^2}}, where :math:`\\bar{y}` is the mean of :math:`y` over all time steps. This metric is not symmetric. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. References ---------- .. [1] https://en.wikipedia.org/wiki/Coefficient_of_determination """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=True ) ss_errors = np.nansum((y_true - y_pred) ** 2, axis=TIME_AX) y_hat = np.nanmean(y_true, axis=TIME_AX) ss_tot = np.nansum((y_true - y_hat) ** 2, axis=TIME_AX) return 1 - ss_errors / ss_tot
[docs]@multi_ts_support @multivariate_support def coefficient_of_variation( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Coefficient of Variation (percentage). For the true series :math:`y` and predicted series :math:`\\hat{y}` of length :math:`T`, it is computed per component/column as a percentage value with: .. math:: 100 \\cdot \\text{RMSE}(y_t, \\hat{y}_t) / \\bar{y}, where :math:`RMSE` is the Root Mean Squared Error (:func:`~darts.metrics.metrics.rmse`), and :math:`\\bar{y}` is the average of :math:`y` over all time steps. If any of the series is stochastic (containing several samples), :math:`\\hat{y}_t` is the median over all samples for time step :math:`t`. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, remove_nan_union=True ) # not calling rmse as y_true and y_pred are np.ndarray return ( 100 * np.sqrt(np.nanmean((y_true - y_pred) ** 2, axis=TIME_AX)) / np.nanmean(y_true, axis=TIME_AX) )
# Dynamic Time Warping
[docs]@multi_ts_support @multivariate_support def dtw_metric( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], metric: Callable[ [ Union[TimeSeries, Sequence[TimeSeries]], Union[TimeSeries, Sequence[TimeSeries]], ], METRIC_OUTPUT_TYPE, ] = mae, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, **kwargs, ) -> METRIC_OUTPUT_TYPE: """ Applies Dynamic Time Warping to `actual_series` and `pred_series` before passing it into the metric. Enables comparison between series of different lengths, phases and time indices. Defaults to using :func:`~darts.metrics.metrics.mae` as a metric. See :func:`~darts.dataprocessing.dtw.dtw.dtw` for more supported parameters. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. metric The selected metric with signature '[[TimeSeries, TimeSeries], float]' to use. Default: `mae`. component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ alignment = dtw.dtw(actual_series, pred_series, **kwargs) warped_actual_series, warped_pred_series = alignment.warped() return _get_wrapped_metric(metric)( warped_actual_series, warped_pred_series, )
[docs]@multi_ts_support @multivariate_support def qr( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], q: float = 0.5, intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Quantile Risk (QR) QR is a metric that quantifies the accuracy of a specific quantile :math:`q` from the predicted value distribution of a stochastic/probabilistic `pred_series` containing N samples. The main difference to the Quantile Loss (QL) is that QR computes the quantile and loss on the aggregate of all sample values summed up along the time axis (QL computes the quantile and loss per time step). For the true series :math:`y` and predicted stochastic/probabilistic series (containing N samples) :math:`\\hat{y}` of of shape :math:`T \\times N`, it is computed per column/component as: .. math:: 2 \\frac{QL(Z, \\hat{Z}_q)}{Z}, where :math:`QL` is the Quantile Loss (:func:`~darts.metrics.metrics.ql`), :math:`Z = \\sum_{t=1}^{T} y_t` is the sum of all target/actual values, :math:`\\hat{Z} = \\sum_{t=1}^{T} \\hat{y}_t` is the sum of all predicted samples along the time axis, and :math:`\\hat{Z}_q` is the quantile :math:`q` of that sum. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. q The quantile (float [0, 1]) of interest for the risk evaluation. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ if not pred_series.is_stochastic: raise_log( ValueError( "quantile risk (qr) should only be computed for stochastic predicted TimeSeries." ), logger=logger, ) z_true, z_hat = _get_values_or_raise( actual_series, pred_series, intersect, stochastic_quantile=None, remove_nan_union=True, ) z_true = np.nansum(z_true, axis=TIME_AX) z_hat = np.nansum( z_hat, axis=TIME_AX ) # aggregate all individual sample realizations z_hat_rho = np.quantile( z_hat, q=q, axis=1 ) # get the quantile from aggregated samples # quantile loss errors = z_true - z_hat_rho losses = 2 * np.maximum((q - 1) * errors, q * errors) return losses / z_true
[docs]@multi_ts_support @multivariate_support def ql( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], q: float = 0.5, intersect: bool = True, *, time_reduction: Optional[Callable[..., np.ndarray]] = None, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Quantile Loss (QL). Also known as Pinball Loss. QL is a metric that quantifies the accuracy of a specific quantile :math:`q` from the predicted value distribution of a stochastic/probabilistic `pred_series` containing N samples. QL computes the quantile of all sample values and the loss per time step. For the true series :math:`y` and predicted stochastic/probabilistic series (containing N samples) :math:`\\hat{y}` of of shape :math:`T \\times N`, it is computed per column/component and time step :math:`t` as: .. math:: 2 \\max((q - 1) (y_t - \\hat{y}_{t,q}), q (y_t - \\hat{y}_{t,q})), where :math:`\\hat{y}_{t,q}` is the quantile :math:`q` of all predicted sample values at time :math:`t`. The factor `2` makes the loss more interpretable, as for `q=0.5` the loss is identical to the Absolute Error (:func:`~darts.metrics.metrics.ae`). Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. q The quantile (float [0, 1]) of interest for the loss. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. time_reduction Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the time axis. If `None`, will return a metric per time step. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - a sequence (list) of uni/multivariate series with `series_reduction`, `component_reduction` and `time_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n time steps, n components) without time and component reductions. For: - single multivariate series and at least `component_reduction=None`. - single uni/multivariate series and at least `time_reduction=None`. - a sequence of uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ if not pred_series.is_stochastic: raise_log( ValueError( "quantile/pinball loss (ql) should only be computed for " "stochastic predicted TimeSeries." ), logger=logger, ) y_true, y_pred = _get_values_or_raise( actual_series, pred_series, intersect, stochastic_quantile=q, remove_nan_union=True, ) errors = y_true - y_pred losses = 2.0 * np.maximum((q - 1) * errors, q * errors) return losses
[docs]@multi_ts_support @multivariate_support def mql( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], q: float = 0.5, intersect: bool = True, *, component_reduction: Optional[Callable[[np.ndarray], float]] = np.nanmean, series_reduction: Optional[Callable[[np.ndarray], Union[float, np.ndarray]]] = None, n_jobs: int = 1, verbose: bool = False, ) -> METRIC_OUTPUT_TYPE: """Mean Quantile Loss (MQL). Also known as Pinball Loss. QL is a metric that quantifies the accuracy of a specific quantile :math:`q` from the predicted value distribution of a stochastic/probabilistic `pred_series` containing N samples. MQL first computes the quantile of all sample values and the loss per time step, and then takes the mean over the time axis. For the true series :math:`y` and predicted stochastic/probabilistic series (containing N samples) :math:`\\hat{y}` of of shape :math:`T \\times N`, it is computed per column/component as: .. math:: 2 \\frac{1}{T}\\sum_{t=1}^T{\\max((q - 1) (y_t - \\hat{y}_{t,q}), q (y_t - \\hat{y}_{t,q}))}, where :math:`\\hat{y}_{t,q}` is the quantile :math:`q` of all predicted sample values at time :math:`t`. The factor `2` makes the loss more interpretable, as for `q=0.5` the loss is identical to the Mean Absolute Error (:func:`~darts.metrics.metrics.mae`). Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. q The quantile (float [0, 1]) of interest for the loss. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). component_reduction Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray` of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `1` corresponding to the component axis. If `None`, will return a metric per component. series_reduction Optionally, a function to aggregate the metrics over the series axis. It must reduce a `np.ndarray` of shape `(s, t, c)` to a `np.ndarray` of shape `(t, c)` The function takes as input a ``np.ndarray`` and a parameter named `axis`, and returns the reduced array. The `axis` receives value `0` corresponding to the series axis. If `None`, will return a metric per series. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Returns ------- float A single metric score for: - single univariate series. - single multivariate series with `component_reduction`. - sequence (list) of uni/multivariate series with `series_reduction` and `component_reduction`. np.ndarray A numpy array of metric scores. The array has shape (n components,) without component reduction. For: - single multivariate series and at least `component_reduction=None`. - sequence of uni/multivariate series including `series_reduction` and `component_reduction=None`. List[float] Same as for type `float` but for a sequence of series. List[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. """ return np.nanmean( _get_wrapped_metric(ql)( actual_series, pred_series, q=q, intersect=intersect, ), axis=TIME_AX, )