Source code for darts.ad.anomaly_model.forecasting_am

"""
Forecasting Anomaly Model
-------------------------

A `ForecastingAnomalyModel` wraps around a Darts forecasting model and one or several anomaly
scorer(s) to compute anomaly scores by comparing how actuals deviate from the model's forecasts.
"""

# TODO:
#     - put start default value to its minimal value (wait for the release of historical_forecast)
import sys
from typing import Dict, Optional, Sequence, Union

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self
try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal

import pandas as pd

from darts.ad.anomaly_model.anomaly_model import AnomalyModel
from darts.ad.scorers.scorers import AnomalyScorer
from darts.logging import get_logger, raise_log
from darts.models.forecasting.forecasting_model import GlobalForecastingModel
from darts.timeseries import TimeSeries

logger = get_logger(__name__)


[docs]class ForecastingAnomalyModel(AnomalyModel): def __init__( self, model: GlobalForecastingModel, scorer: Union[AnomalyScorer, Sequence[AnomalyScorer]], ): """Forecasting-based Anomaly Detection Model The forecasting model must be a `GlobalForecastingModel` that may or may not be already fitted. The underlying assumption is that `model` should be able to accurately forecast the series in the absence of anomalies. For this reason, it is recommended to either provide a model that has already been fitted and evaluated to work appropriately on a series without anomalies, or to ensure that a simple call to the :func:`fit()` method of the model will be sufficient to train it to satisfactory performance on a series without anomalies. The pre-trained model will be used to generate forecasts when calling :func:`score()`. Calling :func:`fit()` on the anomaly model will fit the underlying forecasting model only if `allow_model_training` is set to `True` upon calling `fit()`. In addition, calling :func:`fit()` will also fit the fittable scorers, if any. Parameters ---------- model An instance of a Darts forecasting model. scorer One or multiple scorer(s) that will be used to compare the actual and predicted time series in order to obtain an anomaly score `TimeSeries`. If a list of `N` scorers is given, the anomaly model will call each one of the scorers and output a list of `N` anomaly scores `TimeSeries`. """ if not isinstance(model, GlobalForecastingModel): raise_log( ValueError("`model` must be a Darts `GlobalForecastingModel`."), logger=logger, ) self.model = model super().__init__(model=model, scorer=scorer)
[docs] def fit( self, series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, allow_model_training: bool = False, forecast_horizon: int = 1, start: Union[pd.Timestamp, float, int] = None, start_format: Literal["position", "value"] = "value", num_samples: int = 1, verbose: bool = False, show_warnings: bool = True, enable_optimization: bool = True, **model_fit_kwargs, ) -> Self: """Fit the underlying forecasting model (if applicable) and the fittable scorers, if any. Train the forecasting model (if not already fitted and `allow_model_training` is `True`) and the fittable scorer(s) on the given time series. We use the trained forecasting model to compute historical forecasts for the input `series`. The scorer(s) are then trained on these forecasts along with the input `series`. Parameters ---------- series The (sequence of) series to train on (generally assumed to be anomaly-free). past_covariates Optionally, a (sequence of) past-observed covariate series or sequence of series. This applies only to models that support past covariates. future_covariates Optionally, a (sequence of) future-known covariate series or sequence of series. This applies only to models that support future covariates. allow_model_training Whether the forecasting model should be fitted on the given series. If `False`, the model must already be fitted. forecast_horizon The forecast horizon for the predictions. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a `pd.RangeIndex`. If set to 'position', `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: `'value'` num_samples Number of times a prediction is sampled from a probabilistic model. Should be left set to 1 for deterministic models. verbose Whether to print progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. model_fit_kwargs Parameters to be passed on to the forecast model `fit()` method. Returns ------- self Fitted model """ return super().fit( series=series, past_covariates=past_covariates, future_covariates=future_covariates, allow_model_training=allow_model_training, forecast_horizon=forecast_horizon, start=start, start_format=start_format, num_samples=num_samples, verbose=verbose, show_warnings=show_warnings, enable_optimization=enable_optimization, **model_fit_kwargs, )
[docs] def score( self, series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, forecast_horizon: int = 1, start: Union[pd.Timestamp, float, int] = None, start_format: Literal["position", "value"] = "value", num_samples: int = 1, verbose: bool = False, show_warnings: bool = True, enable_optimization: bool = True, return_model_prediction: bool = False, ) -> Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]]: """Compute anomaly score(s) for the given series. Predicts the given target time series with the forecasting model, and applies the scorer(s) on the prediction and the target input time series. Parameters ---------- series The (sequence of) series to score on. past_covariates Optionally, a (sequence of) past-observed covariate series or sequence of series. This applies only to models that support past covariates. future_covariates Optionally, a (sequence of) future-known covariate series or sequence of series. This applies only to models that support future covariates. forecast_horizon The forecast horizon for the predictions. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a `pd.RangeIndex`. If set to 'position', `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: `'value'` num_samples Number of times a prediction is sampled from a probabilistic model. Should be left set to 1 for deterministic models. verbose Whether to print progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. return_model_prediction Whether to return the forecasting model prediction along with the anomaly scores. Returns ------- TimeSeries A single `TimeSeries` for a single `series` with a single anomaly scorers. Sequence[TimeSeries] A sequence of `TimeSeries` for: - a single `series` with multiple anomaly scorers. - a sequence of `series` with a single anomaly scorer. Sequence[Sequence[TimeSeries]] A sequence of sequences of `TimeSeries` for a sequence of `series` and multiple anomaly scorers. The outer sequence is over the series, and inner sequence is over the scorers. """ return super().score( series=series, past_covariates=past_covariates, future_covariates=future_covariates, forecast_horizon=forecast_horizon, start=start, start_format=start_format, num_samples=num_samples, verbose=verbose, show_warnings=show_warnings, enable_optimization=enable_optimization, return_model_prediction=return_model_prediction, )
[docs] def predict_series( self, series: Sequence[TimeSeries], past_covariates: Optional[Sequence[TimeSeries]] = None, future_covariates: Optional[Sequence[TimeSeries]] = None, forecast_horizon: int = 1, start: Union[pd.Timestamp, float, int] = None, start_format: Literal["position", "value"] = "value", num_samples: int = 1, verbose: bool = False, show_warnings: bool = True, enable_optimization: bool = True, ) -> Sequence[TimeSeries]: """Computes the historical forecasts that would have been obtained by the underlying forecasting model on `series`. `retrain` is set to `False` if possible (this is not supported by all models). If set to `True`, it will always re-train the model on the entire available history, Parameters ---------- series The sequence of series to score on. past_covariates Optionally, a sequence of past-observed covariate series or sequence of series. This applies only to models that support past covariates. future_covariates Optionally, a sequence of future-known covariate series or sequence of series. This applies only to models that support future covariates. forecast_horizon The forecast horizon for the predictions. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a `pd.RangeIndex`. If set to 'position', `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: `'value'` num_samples Number of times a prediction is sampled from a probabilistic model. Should be left set to 1 for deterministic models. verbose Whether to print progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. Returns ------- Sequence[TimeSeries] A sequence of `TimeSeries` with the historical forecasts for each series (with `last_points_only=True`). """ if not self.model._fit_called: raise_log( ValueError( f"Forecasting `model` {self.model} has not been trained yet. Call `fit()` before." ), logger=logger, ) return self.model.historical_forecasts( series, past_covariates=past_covariates, future_covariates=future_covariates, forecast_horizon=forecast_horizon, stride=1, retrain=False, last_points_only=True, start=start, start_format=start_format, num_samples=num_samples, verbose=verbose, show_warnings=show_warnings, enable_optimization=enable_optimization, )
[docs] def eval_metric( self, anomalies: Union[TimeSeries, Sequence[TimeSeries]], series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, forecast_horizon: int = 1, start: Union[pd.Timestamp, float, int] = None, start_format: Literal["position", "value"] = "value", num_samples: int = 1, verbose: bool = False, show_warnings: bool = True, enable_optimization: bool = True, metric: Literal["AUC_ROC", "AUC_PR"] = "AUC_ROC", ) -> Union[ Dict[str, float], Dict[str, Sequence[float]], Sequence[Dict[str, float]], Sequence[Dict[str, Sequence[float]]], ]: """Compute the accuracy of the anomaly scores computed by the model. Predicts the `series` with the forecasting model, and applies the scorer(s) on the predicted time series and the given target time series. Returns the score(s) of an agnostic threshold metric, based on the anomaly score given by the scorer(s). Parameters ---------- anomalies The (sequence of) ground truth binary anomaly series (`1` if it is an anomaly and `0` if not). series The (sequence of) series to predict anomalies on. past_covariates Optionally, a (sequence of) past-observed covariate series or sequence of series. This applies only to models that support past covariates. future_covariates Optionally, a (sequence of) future-known covariate series or sequence of series. This applies only to models that support future covariates. forecast_horizon The forecast horizon for the predictions. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a `pd.RangeIndex`. If set to 'position', `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: `'value'` num_samples Number of times a prediction is sampled from a probabilistic model. Should be left set to 1 for deterministic models. verbose Whether to print progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. metric The name of the metric function to use. Must be one of "AUC_ROC" (Area Under the Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). Default: "AUC_ROC". Returns ------- Dict[str, float] A dictionary with the resulting metrics for single univariate `series`, with keys representing the anomaly scorer(s), and values representing the metric values. Dict[str, Sequence[float]] Same as for `Dict[str, float]` but for multivariate `series`, and anomaly scorers that treat series components/columns independently (by nature of the scorer or if `component_wise=True`). Sequence[Dict[str, float]] Same as for `Dict[str, float]` but for a sequence of univariate series. Sequence[Dict[str, Sequence[float]]] Same as for `Dict[str, float]` but for a sequence of multivariate series. """ return super().eval_metric( anomalies=anomalies, series=series, past_covariates=past_covariates, future_covariates=future_covariates, forecast_horizon=forecast_horizon, start=start, start_format=start_format, num_samples=num_samples, verbose=verbose, show_warnings=show_warnings, enable_optimization=enable_optimization, metric=metric, )
[docs] def show_anomalies( self, series: TimeSeries, past_covariates: Optional[TimeSeries] = None, future_covariates: Optional[TimeSeries] = None, forecast_horizon: int = 1, start: Union[pd.Timestamp, float, int] = None, start_format: Literal["position", "value"] = "value", num_samples: int = 1, verbose: bool = False, show_warnings: bool = True, enable_optimization: bool = True, anomalies: TimeSeries = None, names_of_scorers: Union[str, Sequence[str]] = None, title: str = None, metric: Optional[Literal["AUC_ROC", "AUC_PR"]] = None, **score_kwargs, ): """Plot the results of the anomaly model. Computes the score on the given series input and shows the different anomaly scores with respect to time. The plot will be composed of the following: - the series itself with the output of the forecasting model. - the anomaly score for each scorer. The scorers with different windows will be separated. - the actual anomalies, if given. It is possible to: - add a title to the figure with the parameter `title` - give personalized names for the scorers with `names_of_scorers` - show the results of a metric for each anomaly score (AUC_ROC or AUC_PR), if the actual anomalies are provided. Parameters ---------- series The series to visualize anomalies from. past_covariates Optionally, a past-observed covariate series or sequence of series. This applies only to models that support past covariates. future_covariates Optionally, a future-known covariate series or sequence of series. This applies only to models that support future covariates. forecast_horizon The forecast horizon for the predictions. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a `pd.RangeIndex`. If set to 'position', `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: `'value'` num_samples Number of times a prediction is sampled from a probabilistic model. Should be left set to 1 for deterministic models. verbose Whether to print progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. enable_optimization Whether to use the optimized version of historical_forecasts when supported and available. anomalies The ground truth of the anomalies (1 if it is an anomaly and 0 if not). names_of_scorers Name of the scores. Must be a list of length equal to the number of scorers in the anomaly_model. title Title of the figure. metric Optionally, the name of the metric function to use. Must be one of "AUC_ROC" (Area Under the Receiver Operating Characteristic Curve) and "AUC_PR" (Average Precision from scores). Default: "AUC_ROC". score_kwargs parameters for the `score()` method. """ predict_kwargs = { "past_covariates": past_covariates, "future_covariates": future_covariates, "forecast_horizon": forecast_horizon, "start": start, "start_format": start_format, "num_samples": num_samples, "verbose": verbose, "show_warnings": show_warnings, "enable_optimization": enable_optimization, } return super().show_anomalies( series=series, anomalies=anomalies, predict_kwargs=predict_kwargs, names_of_scorers=names_of_scorers, title=title, metric=metric, **score_kwargs, )
def _fit_core( self, series: Sequence[TimeSeries], past_covariates: Optional[Sequence[TimeSeries]] = None, future_covariates: Optional[Sequence[TimeSeries]] = None, allow_model_training: bool = False, forecast_horizon: int = 1, start: Union[pd.Timestamp, float, int] = 0.5, start_format: Literal["position", "value"] = "value", num_samples: int = 1, verbose: bool = False, show_warnings: bool = True, enable_optimization: bool = True, **model_fit_kwargs, ): """Fit the forecasting model (if applicable) and scorers.""" # fit forecasting model if allow_model_training: self.model._fit_wrapper( series=series, past_covariates=past_covariates, future_covariates=future_covariates, **model_fit_kwargs, ) elif not self.model._fit_called: raise_log( ValueError( f"With `allow_model_training=False`, the underlying model `{self.model.__class__.__name__}` " f"must have already been trained. Either train it before or set `allow_model_training=True` " f"(model will trained from scratch on the provided series)." ), logger=logger, ) # generate the historical_forecast() prediction of the model on the train set if self.scorers_are_trainable: historical_forecasts = self.predict_series( series=series, past_covariates=past_covariates, future_covariates=future_covariates, forecast_horizon=forecast_horizon, start=start, start_format=start_format, num_samples=num_samples, verbose=verbose, show_warnings=show_warnings, enable_optimization=enable_optimization, ) # fit the scorers self._fit_scorers(series, historical_forecasts)