Source code for darts.models.forecasting.conformal_models

"""
Conformal Models
---------------

A collection of conformal prediction models for pre-trained global forecasting models.
"""

import copy
import math
import os
import sys
from abc import ABC, abstractmethod
from collections.abc import Sequence
from typing import Any, BinaryIO, Callable, Optional, Union

from darts.utils.likelihood_models.base import (
    Likelihood,
    LikelihoodType,
    quantile_names,
)

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

import numpy as np
import pandas as pd

from darts import TimeSeries, metrics
from darts.dataprocessing.pipeline import Pipeline
from darts.dataprocessing.transformers import BaseDataTransformer
from darts.logging import get_logger, raise_log
from darts.metrics.metrics import METRIC_TYPE
from darts.models.forecasting.forecasting_model import GlobalForecastingModel
from darts.utils import _build_tqdm_iterator, _with_sanity_checks
from darts.utils.historical_forecasts.utils import (
    _adjust_historical_forecasts_time_index,
)
from darts.utils.timeseries_generation import _build_forecast_series
from darts.utils.ts_utils import (
    SeriesType,
    get_series_seq_type,
    series2seq,
)
from darts.utils.utils import (
    TORCH_AVAILABLE,
    _check_quantiles,
    generate_index,
    n_steps_between,
    random_method,
    sample_from_quantiles,
)

if TORCH_AVAILABLE:
    from darts.models.forecasting.torch_forecasting_model import TorchForecastingModel
else:
    TorchForecastingModel = None

logger = get_logger(__name__)


[docs]class ConformalModel(GlobalForecastingModel, ABC): @random_method def __init__( self, model: GlobalForecastingModel, quantiles: list[float], symmetric: bool = True, cal_length: Optional[int] = None, cal_stride: int = 1, cal_num_samples: int = 500, random_state: Optional[int] = None, ): """Base Conformal Prediction Model. Base class for any conformal prediction model. A conformal model calibrates the predictions from any pre-trained global forecasting model. It does not have to be trained, and can generate calibrated forecasts directly using the underlying trained forecasting model. Since it is a probabilistic model, you can generate forecasts in two ways (when calling `predict()`, `historical_forecasts()`, ...): - Predict the calibrated quantile intervals directly: Pass parameters `predict_likelihood_parameters=True`, and `num_samples=1` to the forecast method. - Predict stochastic samples from the calibrated quantile intervals: Pass parameters `predict_likelihood_parameters=False`, and `num_samples>>1` to the forecast method. Conformal models can be applied to any of Darts' global forecasting model, as long as the model has been fitted before. In general the workflow of the models to produce one calibrated forecast/prediction is as follows: - Extract a calibration set: The calibration set for each conformal forecast is automatically extracted from the most recent past of your input series relative to the forecast start point. The number of calibration examples (forecast errors / non-conformity scores) to consider can be defined at model creation with parameter `cal_length`. Note that when using `cal_stride>1`, a longer history is required since the calibration examples are generated with stridden historical forecasts. - Generate historical forecasts on the calibration set (using the forecasting model) with a stride `cal_stride`. - Compute the errors/non-conformity scores (specific to each conformal model) on these historical forecasts - Compute the quantile values from the errors / non-conformity scores (using our desired quantiles set at model creation with parameter `quantiles`). - Compute the conformal prediction: Using these quantile values, add calibrated intervals to (or adjust the existing intervals of) the forecasting model's predictions. Some notes: - When computing `historical_forecasts()`, `backtest()`, `residuals()`, ... the above is applied for each forecast (the forecasting model's historical forecasts are only generated once for efficiency). - For multi-horizon forecasts, the above is applied for each step in the horizon separately. Parameters ---------- model A pre-trained global forecasting model. See the list of models `here <https://unit8co.github.io/darts/#forecasting-models>`_. quantiles A list of quantiles centered around the median `q=0.5` to use. For example quantiles [0.1, 0.2, 0.5, 0.8 0.9] correspond to two intervals with (0.9 - 0.1) = 80%, and (0.8 - 0.2) 60% coverage around the median (model forecast). symmetric Whether to use symmetric non-conformity scores. If `False`, uses asymmetric scores (individual scores for lower- and upper quantile interval bounds). cal_length The number of past forecast errors / non-conformity scores to use as calibration for each conformal forecast (and each step in the horizon). If `None`, considers all scores. cal_stride The stride to apply when computing the historical forecasts and non-conformity scores on the calibration set. The actual conformal forecasts can have a different stride given with parameter `stride` in downstream tasks (e.g. historical forecasts, backtest, ...) cal_num_samples The number of samples to generate for each calibration forecast (if `model` is a probabilistic forecasting model). The non-conformity scores are computed on the quantile values of these forecasts (using quantiles `quantiles`). Uses `1` for deterministic models. The actual conformal forecasts can have a different number of samples given with parameter `num_samples` in downstream tasks (e.g. predict, historical forecasts, ...). random_state Control the randomness of probabilistic conformal forecasts (sample generation) across different runs. """ if not isinstance(model, GlobalForecastingModel) or not model._fit_called: raise_log( ValueError("`model` must be a pre-trained `GlobalForecastingModel`."), logger=logger, ) _check_quantiles(quantiles) if cal_length is not None and cal_length < 1: raise_log( ValueError("`cal_length` must be `>=1` or `None`."), logger=logger ) if cal_stride < 1: raise_log(ValueError("`cal_stride` must be `>=1`."), logger=logger) if cal_num_samples < 1: raise_log(ValueError("`cal_num_samples` must be `>=1`."), logger=logger) super().__init__(add_encoders=None) # quantiles and interval setup self.quantiles = np.array(quantiles) self.idx_median = quantiles.index(0.5) self.q_interval = [ (q_l, q_h) for q_l, q_h in zip( quantiles[: self.idx_median], quantiles[self.idx_median + 1 :][::-1] ) ] self.interval_range = np.array([ q_high - q_low for q_low, q_high in self.q_interval ]) if symmetric: # symmetric considers both tails together self.interval_range_sym = copy.deepcopy(self.interval_range) else: # asymmetric considers tails separately self.interval_range_sym = 1 - (1 - self.interval_range) / 2 self.symmetric = symmetric # model setup self.model = model self.cal_length = cal_length self.cal_stride = cal_stride self.cal_num_samples = ( cal_num_samples if model.supports_probabilistic_prediction else 1 ) self._likelihood = Likelihood( likelihood_type=LikelihoodType.Quantile, parameter_names=quantile_names(quantiles), ) self._fit_called = True
[docs] def fit( self, series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, **kwargs, ) -> "ConformalModel": """Fit/train the underlying forecasting model on (potentially multiple) series. Optionally, one or multiple past and/or future covariates series can be provided as well, depending on the forecasting model used. The number of covariates series must match the number of target series. Notes ----- Conformal Models do not require calling `fit()`, since they use pre-trained global forecasting models. You can call `predict()` directly. Also, make sure that the input series used in `predict()` corresponds to a calibration set, and not the same as used during training with `fit()`. Parameters ---------- series One or several target time series. The model will be trained to forecast these time series. The series may or may not be multivariate, but if multiple series are provided they must have the same number of components. past_covariates One or several past-observed covariate time series. These time series will not be forecast, but can be used by some models as an input. The covariate(s) may or may not be multivariate, but if multiple covariates are provided they must have the same number of components. If `past_covariates` is provided, it must contain the same number of series as `series`. future_covariates One or several future-known covariate time series. These time series will not be forecast, but can be used by some models as an input. The covariate(s) may or may not be multivariate, but if multiple covariates are provided they must have the same number of components. If `future_covariates` is provided, it must contain the same number of series as `series`. **kwargs Optional keyword arguments that will passed to the underlying forecasting model's `fit()` method. Returns ------- self Fitted model. """ # does not have to be trained, but we allow it for unified API self.model.fit( series=series, past_covariates=past_covariates, future_covariates=future_covariates, **kwargs, ) return self
[docs] def predict( self, n: int, series: Union[TimeSeries, Sequence[TimeSeries]] = None, past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, num_samples: int = 1, verbose: bool = False, predict_likelihood_parameters: bool = False, show_warnings: bool = True, **kwargs, ) -> Union[TimeSeries, Sequence[TimeSeries]]: """Forecasts calibrated quantile intervals (or samples from calibrated intervals) for `n` time steps after the end of the `series`. It is important that the input series for prediction correspond to a calibration set - a set different to the series that the underlying forecasting `model` was trained on. Since it is a probabilistic model, you can generate forecasts in two ways: - Predict the calibrated quantile intervals directly: Pass parameters `predict_likelihood_parameters=True`, and `num_samples=1` to the forecast method. - Predict stochastic samples from the calibrated quantile intervals: Pass parameters `predict_likelihood_parameters=False`, and `num_samples>>1` to the forecast method. Under the hood, the simplified workflow to produce one calibrated forecast/prediction for every step in the horizon `n` is as follows (note: `cal_length` and `cal_stride` can be set at model creation): - Extract a calibration set: The calibration set for each conformal forecast is automatically extracted from the most recent past of your input series relative to the forecast start point. The number of calibration examples (forecast errors / non-conformity scores) to consider can be defined at model creation with parameter `cal_length`. Note that when using `cal_stride>1`, a longer history is required since the calibration examples are generated with stridden historical forecasts. - Generate historical forecasts on the calibration set (using the forecasting model) with a stride `cal_stride`. - Compute the errors/non-conformity scores (specific to each conformal model) on these historical forecasts - Compute the quantile values from the errors / non-conformity scores (using our desired quantiles set at model creation with parameter `quantiles`). - Compute the conformal prediction: Using these quantile values, add calibrated intervals to (or adjust the existing intervals of) the forecasting model's predictions. Parameters ---------- n Forecast horizon - the number of time steps after the end of the series for which to produce predictions. series A series or sequence of series, representing the history of the target series whose future is to be predicted. Will use the past of this series for calibration. The series should not have any overlap with the series used to train the forecasting model. past_covariates Optionally, a (sequence of) past-observed covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. future_covariates Optionally, a (sequence of) future-known covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. num_samples Number of times a prediction is sampled from the calibrated quantile predictions using linear interpolation in-between the quantiles. For larger values, the sample distribution approximates the calibrated quantile predictions. verbose Whether to print the progress. predict_likelihood_parameters If set to `True`, generates the quantile predictions directly. Only supported with `num_samples = 1`. show_warnings Whether to show warnings related auto-regression and past covariates usage. **kwargs Optional keyword arguments that will passed to the underlying forecasting model's `predict()` and `historical_forecasts()` methods. Returns ------- Union[TimeSeries, Sequence[TimeSeries]] If `series` is not specified, this function returns a single time series containing the `n` next points after then end of the training series. If `series` is given and is a simple ``TimeSeries``, this function returns the `n` next points after the end of `series`. If `series` is given and is a sequence of several time series, this function returns a sequence where each element contains the corresponding `n` points forecasts. """ # call predict to verify that all series have required input times _ = self.model.predict( n=n, series=series, past_covariates=past_covariates, future_covariates=future_covariates, num_samples=self.cal_num_samples, verbose=verbose, predict_likelihood_parameters=False, show_warnings=show_warnings, **kwargs, ) series = series or self.model.training_series called_with_single_series = get_series_seq_type(series) == SeriesType.SINGLE series = series2seq(series) # generate only the required forecasts for calibration (including the last forecast which is the output of # `predict()`) cal_start, cal_start_format = _get_calibration_hfc_start( series=series, horizon=n, output_chunk_shift=self.output_chunk_shift, cal_length=self.cal_length, cal_stride=self.cal_stride, start="end", start_format="position", ) cal_hfcs = self.model.historical_forecasts( series=series, past_covariates=past_covariates, future_covariates=future_covariates, forecast_horizon=n, num_samples=self.cal_num_samples, start=cal_start, start_format=cal_start_format, stride=self.cal_stride, retrain=False, overlap_end=True, last_points_only=False, verbose=verbose, show_warnings=False, predict_likelihood_parameters=False, predict_kwargs=kwargs, ) cal_preds = self._calibrate_forecasts( series=series, forecasts=cal_hfcs, num_samples=num_samples, start="end", # uses last hist fc (output of `predict()`) start_format="position", forecast_horizon=n, stride=self.cal_stride, overlap_end=True, last_points_only=False, verbose=verbose, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, ) # convert historical forecasts output to simple forecast / prediction if called_with_single_series: return cal_preds[0][0] else: return [cp[0] for cp in cal_preds]
[docs] @_with_sanity_checks("_historical_forecasts_sanity_checks") def historical_forecasts( self, series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, forecast_horizon: int = 1, num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, int]] = None, start_format: Literal["position", "value"] = "value", stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, overlap_end: bool = False, last_points_only: bool = True, verbose: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, enable_optimization: bool = True, data_transformers: Optional[ dict[str, Union[BaseDataTransformer, Pipeline]] ] = None, fit_kwargs: Optional[dict[str, Any]] = None, predict_kwargs: Optional[dict[str, Any]] = None, sample_weight: Optional[Union[TimeSeries, Sequence[TimeSeries], str]] = None, ) -> Union[TimeSeries, list[TimeSeries], list[list[TimeSeries]]]: """Generates calibrated historical forecasts by simulating predictions at various points in time throughout the history of the provided (potentially multiple) `series`. This process involves retrospectively applying the model to different time steps, as if the forecasts were made in real-time at those specific moments. This allows for an evaluation of the model's performance over the entire duration of the series, providing insights into its predictive accuracy and robustness across different historical periods. Currently, conformal models only support the pre-trained historical forecasts mode (`retrain=False`). Parameters `retrain` and `train_length` are ignored. **Pre-trained Mode:** First, all historical forecasts are generated using the underlying pre-trained global forecasting model (see :meth:`ForecastingModel.historical_forecasts() <darts.models.forecasting.forecasting_model.ForecastingModel.historical_forecasts>` for more info). Then it repeatedly builds a calibration set by either expanding from the beginning of the historical forecasts or by using a fixed-length moving window with length `cal_length` (the start point can also be configured with `start` and `start_format`). The next forecast of length `forecast_horizon` is then calibrated on this calibration set. Subsequently, the end of the calibration set is moved forward by `stride` time steps, and the process is repeated. By default, with `last_points_only=True`, this method returns a single time series (or a sequence of time series when `series` is also a sequence of series) composed of the last point from each calibrated historical forecast. This time series will thus have a frequency of `series.freq * stride`. If `last_points_only=False`, it will instead return a list (or a sequence of lists) with all calibrated historical forecasts of length `forecast_horizon` and frequency `series.freq`. Parameters ---------- series A (sequence of) target time series used to successively compute the historical forecasts. Will use the past of this series for calibration. The series should not have any overlap with the series used to train the forecasting model. past_covariates Optionally, a (sequence of) past-observed covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. future_covariates Optionally, a (sequence of) future-known covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. forecast_horizon The forecast horizon for the predictions. num_samples Number of times a prediction is sampled from the calibrated quantile predictions using linear interpolation in-between the quantiles. For larger values, the sample distribution approximates the calibrated quantile predictions. train_length Currently ignored by conformal models. start Optionally, the first point in time at which a prediction is computed. This parameter supports: ``int``, ``pandas.Timestamp``, and ``None``. If an ``int``, it is either the index position of the first prediction point for `series` with a `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to the index position with `start_format="position"`. If a ``pandas.Timestamp``, it is the time stamp of the first prediction point. If ``None``, the first prediction point will automatically be set to: - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first predictable point is earlier than the first trainable point. - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`), or `retrain` is a ``Callable`` and the first trainable point is earlier than the first predictable point. - the first trainable point (given `train_length`) otherwise Note: If the model uses a shifted output (`output_chunk_shift > 0`), then the first predicted point is also shifted by `output_chunk_shift` points into the future. Note: Raises a ValueError if `start` yields a time outside the time index of `series`. Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. start_format Defines the `start` format. If set to ``'position'``, `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to ``'value'``, `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: ``'value'``. stride The number of time steps between two consecutive predictions. Must be a round-multiple of `cal_stride` (set at model creation) and `>=cal_stride`. retrain Currently ignored by conformal models. overlap_end Whether the returned forecasts can go beyond the series' end or not. last_points_only Whether to return only the last point of each historical forecast. If set to ``True``, the method returns a single ``TimeSeries`` (for each time series in `series`) containing the successive point forecasts. Otherwise, returns a list of historical ``TimeSeries`` forecasts. verbose Whether to print the progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. predict_likelihood_parameters If set to `True`, generates the quantile predictions directly. Only supported with `num_samples = 1`. enable_optimization Whether to use the optimized version of `historical_forecasts` when supported and available. Default: ``True``. data_transformers Optionally, a dictionary of `BaseDataTransformer` or `Pipeline` to apply to the corresponding series (possibles keys; "series", "past_covariates", "future_covariates"). If provided, all input series must be in the un-transformed space. For fittable transformer / pipeline: - if `retrain=True`, the data transformer re-fit on the training data at each historical forecast step (currently ignored by conformal models). - if `retrain=False`, the data transformer transforms the series once before all the forecasts. The fitted transformer is used to transform the input during both training and prediction. If the transformation is invertible, the forecasts will be inverse-transformed. fit_kwargs Currently ignored by conformal models. predict_kwargs Optionally, some additional arguments passed to the model `predict()` method. sample_weight Currently ignored by conformal models. Returns ------- TimeSeries A single historical forecast for a single `series` and `last_points_only=True`: it contains only the predictions at step `forecast_horizon` from all historical forecasts. list[TimeSeries] A list of historical forecasts for: - a sequence (list) of `series` and `last_points_only=True`: for each series, it contains only the predictions at step `forecast_horizon` from all historical forecasts. - a single `series` and `last_points_only=False`: for each historical forecast, it contains the entire horizon `forecast_horizon`. list[list[TimeSeries]] A list of lists of historical forecasts for a sequence of `series` and `last_points_only=False`. For each series, and historical forecast, it contains the entire horizon `forecast_horizon`. The outer list is over the series provided in the input sequence, and the inner lists contain the historical forecasts for each series. """ called_with_single_series = get_series_seq_type(series) == SeriesType.SINGLE series = series2seq(series) past_covariates = series2seq(past_covariates) future_covariates = series2seq(future_covariates) # generate only the required forecasts (if `start` is given, we have to start earlier to satisfy the # calibration set requirements) cal_start, cal_start_format = _get_calibration_hfc_start( series=series, horizon=forecast_horizon, output_chunk_shift=self.output_chunk_shift, cal_length=self.cal_length, cal_stride=self.cal_stride, start=start, start_format=start_format, ) hfcs = self.model.historical_forecasts( series=series, past_covariates=past_covariates, future_covariates=future_covariates, forecast_horizon=forecast_horizon, num_samples=self.cal_num_samples, start=cal_start, start_format=cal_start_format, stride=self.cal_stride, retrain=False, overlap_end=overlap_end, last_points_only=last_points_only, verbose=verbose, show_warnings=False, predict_likelihood_parameters=False, enable_optimization=enable_optimization, data_transformers=data_transformers, fit_kwargs=fit_kwargs, predict_kwargs=predict_kwargs, ) calibrated_forecasts = self._calibrate_forecasts( series=series, forecasts=hfcs, num_samples=num_samples, start=start, start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, overlap_end=overlap_end, last_points_only=last_points_only, verbose=verbose, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, ) return ( calibrated_forecasts[0] if called_with_single_series else calibrated_forecasts )
[docs] def backtest( self, series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, historical_forecasts: Optional[ Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]] ] = None, forecast_horizon: int = 1, num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, int]] = None, start_format: Literal["position", "value"] = "value", stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, overlap_end: bool = False, last_points_only: bool = False, metric: Union[METRIC_TYPE, list[METRIC_TYPE]] = metrics.mape, reduction: Union[Callable[..., float], None] = np.mean, verbose: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, enable_optimization: bool = True, data_transformers: Optional[ dict[str, Union[BaseDataTransformer, Pipeline]] ] = None, metric_kwargs: Optional[Union[dict[str, Any], list[dict[str, Any]]]] = None, fit_kwargs: Optional[dict[str, Any]] = None, predict_kwargs: Optional[dict[str, Any]] = None, sample_weight: Optional[Union[TimeSeries, Sequence[TimeSeries], str]] = None, ) -> Union[float, np.ndarray, list[float], list[np.ndarray]]: """Compute error values that the model produced for historical forecasts on (potentially multiple) `series`. If `historical_forecasts` are provided, the metric(s) (given by the `metric` function) is evaluated directly on all forecasts and actual values. The same `series` and `last_points_only` value must be passed that were used to generate the historical forecasts. Finally, the method returns an optional `reduction` (the mean by default) of all these metric scores. If `historical_forecasts` is ``None``, it first generates the historical forecasts with the parameters given below (see :meth:`ConformalModel.historical_forecasts() <darts.models.forecasting.conformal_models.ConformalModel.historical_forecasts>` for more info) and then evaluates as described above. The metric(s) can be further customized `metric_kwargs` (e.g. control the aggregation over components, time steps, multiple series, other required arguments such as `q` for quantile metrics, ...). Notes ----- Darts has several metrics to evaluate probabilistic forecasts. For conformal models, we recommend using quantile interval metrics (see `here <https://unit8co.github.io/darts/generated_api/darts.metrics.html>`_). You can specify which intervals to evaluate by setting `metric_kwargs={'q_interval': my_intervals}`. To check all intervals used by your conformal model `my_model`, you can set ``{'q_interval': my_model.q_interval}``. Parameters ---------- series A (sequence of) target time series used to successively compute the historical forecasts. Will use the past of this series for calibration. The series should not have any overlap with the series used to train the forecasting model. past_covariates Optionally, a (sequence of) past-observed covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. future_covariates Optionally, a (sequence of) future-known covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. historical_forecasts Optionally, the (or a sequence of / a sequence of sequences of) historical forecasts time series to be evaluated. Corresponds to the output of :meth:`historical_forecasts() <darts.models.forecasting.conformal_models.ConformalModel.historical_forecasts>`. The same `series` and `last_points_only` values must be passed that were used to generate the historical forecasts. If provided, will skip historical forecasting and ignore all parameters except `series`, `last_points_only`, `metric`, and `reduction`. forecast_horizon The forecast horizon for the predictions. num_samples Number of times a prediction is sampled from the calibrated quantile predictions using linear interpolation in-between the quantiles. For larger values, the sample distribution approximates the calibrated quantile predictions. train_length Currently ignored by conformal models. start Optionally, the first point in time at which a prediction is computed. This parameter supports: ``int``, ``pandas.Timestamp``, and ``None``. If an ``int``, it is either the index position of the first prediction point for `series` with a `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to the index position with `start_format="position"`. If a ``pandas.Timestamp``, it is the time stamp of the first prediction point. If ``None``, the first prediction point will automatically be set to: - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first predictable point is earlier than the first trainable point. - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`), or `retrain` is a ``Callable`` and the first trainable point is earlier than the first predictable point. - the first trainable point (given `train_length`) otherwise Note: If the model uses a shifted output (`output_chunk_shift > 0`), then the first predicted point is also shifted by `output_chunk_shift` points into the future. Note: Raises a ValueError if `start` yields a time outside the time index of `series`. Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. start_format Defines the `start` format. If set to ``'position'``, `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to ``'value'``, `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: ``'value'``. stride The number of time steps between two consecutive predictions. retrain Currently ignored by conformal models. overlap_end Whether the returned forecasts can go beyond the series' end or not. last_points_only Whether to return only the last point of each historical forecast. If set to ``True``, the method returns a single ``TimeSeries`` (for each time series in `series`) containing the successive point forecasts. Otherwise, returns a list of historical ``TimeSeries`` forecasts. metric A metric function or a list of metric functions. Each metric must either be a Darts metric (see `here <https://unit8co.github.io/darts/generated_api/darts.metrics.html>`_), or a custom metric that has an identical signature as Darts' metrics, uses decorators :func:`~darts.metrics.metrics.multi_ts_support` and :func:`~darts.metrics.metrics.multi_ts_support`, and returns the metric score. reduction A function used to combine the individual error scores obtained when `last_points_only` is set to `False`. When providing several metric functions, the function will receive the argument `axis = 1` to obtain single value for each metric function. If explicitly set to `None`, the method will return a list of the individual error scores instead. Set to ``np.mean`` by default. verbose Whether to print the progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. predict_likelihood_parameters If set to `True`, generates the quantile predictions directly. Only supported with `num_samples = 1`. enable_optimization Whether to use the optimized version of `historical_forecasts` when supported and available. Default: ``True``. data_transformers Optionally, a dictionary of `BaseDataTransformer` or `Pipeline` to apply to the corresponding series (possibles keys; "series", "past_covariates", "future_covariates"). If provided, all input series must be in the un-transformed space. For fittable transformer / pipeline: - if `retrain=True`, the data transformer re-fit on the training data at each historical forecast step (currently ignored by conformal models). - if `retrain=False`, the data transformer transforms the series once before all the forecasts. The fitted transformer is used to transform the input during both training and prediction. If the transformation is invertible, the forecasts will be inverse-transformed. Only effective when `historical_forecasts=None`. metric_kwargs Additional arguments passed to `metric()`, such as `'n_jobs'` for parallelization, `'component_reduction'` for reducing the component wise metrics, seasonality `'m'` for scaled metrics, etc. Will pass arguments to each metric separately and only if they are present in the corresponding metric signature. Parameter `'insample'` for scaled metrics (e.g. mase`, `rmsse`, ...) is ignored, as it is handled internally. fit_kwargs Currently ignored by conformal models. predict_kwargs Optionally, some additional arguments passed to the model `predict()` method. sample_weight Currently ignored by conformal models. Returns ------- float A single backtest score for single uni/multivariate series, a single `metric` function and: - `historical_forecasts` generated with `last_points_only=True` - `historical_forecasts` generated with `last_points_only=False` and using a backtest `reduction` np.ndarray An numpy array of backtest scores. For single series and one of: - a single `metric` function, `historical_forecasts` generated with `last_points_only=False` and backtest `reduction=None`. The output has shape (n forecasts, *). - multiple `metric` functions and `historical_forecasts` generated with `last_points_only=False`. The output has shape (*, n metrics) when using a backtest `reduction`, and (n forecasts, *, n metrics) when `reduction=None` - multiple uni/multivariate series including `series_reduction` and at least one of `component_reduction=None` or `time_reduction=None` for "per time step metrics" list[float] Same as for type `float` but for a sequence of series. The returned metric list has length `len(series)` with the `float` metric for each input `series`. list[np.ndarray] Same as for type `np.ndarray` but for a sequence of series. The returned metric list has length `len(series)` with the `np.ndarray` metrics for each input `series`. """ return super().backtest( series=series, past_covariates=past_covariates, future_covariates=future_covariates, historical_forecasts=historical_forecasts, forecast_horizon=forecast_horizon, num_samples=num_samples, train_length=train_length, start=start, start_format=start_format, stride=stride, retrain=retrain, overlap_end=overlap_end, last_points_only=last_points_only, metric=metric, reduction=reduction, verbose=verbose, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, enable_optimization=enable_optimization, data_transformers=data_transformers, metric_kwargs=metric_kwargs, fit_kwargs=fit_kwargs, predict_kwargs=predict_kwargs, sample_weight=sample_weight, )
[docs] def residuals( self, series: Union[TimeSeries, Sequence[TimeSeries]], past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, historical_forecasts: Optional[ Union[TimeSeries, Sequence[TimeSeries], Sequence[Sequence[TimeSeries]]] ] = None, forecast_horizon: int = 1, num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, int]] = None, start_format: Literal["position", "value"] = "value", stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, overlap_end: bool = False, last_points_only: bool = True, metric: METRIC_TYPE = metrics.err, verbose: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, enable_optimization: bool = True, data_transformers: Optional[ dict[str, Union[BaseDataTransformer, Pipeline]] ] = None, metric_kwargs: Optional[dict[str, Any]] = None, fit_kwargs: Optional[dict[str, Any]] = None, predict_kwargs: Optional[dict[str, Any]] = None, sample_weight: Optional[Union[TimeSeries, Sequence[TimeSeries], str]] = None, values_only: bool = False, ) -> Union[TimeSeries, list[TimeSeries], list[list[TimeSeries]]]: """Compute the residuals that the model produced for historical forecasts on (potentially multiple) `series`. This function computes the difference (or one of Darts' "per time step" metrics) between the actual observations from `series` and the fitted values obtained by training the model on `series` (or using a pre-trained model with `retrain=False`). Not all models support fitted values, so we use historical forecasts as an approximation for them. In sequence this method performs: - use pre-computed `historical_forecasts` or compute historical forecasts for each series (see :meth:`~darts.models.forecasting.conformal_models.ConformalModel.historical_forecasts` for more details). How the historical forecasts are generated can be configured with parameters `num_samples`, `train_length`, `start`, `start_format`, `forecast_horizon`, `stride`, `retrain`, `last_points_only`, `fit_kwargs`, and `predict_kwargs`. - compute a backtest using a "per time step" `metric` between the historical forecasts and `series` per component/column and time step (see :meth:`~darts.models.forecasting.conformal_models.ConformalModel.backtest` for more details). By default, uses the residuals :func:`~darts.metrics.metrics.err` (error) as a `metric`. - create and return `TimeSeries` (or simply a np.ndarray with `values_only=True`) with the time index from historical forecasts, and values from the metrics per component and time step. This method works for single or multiple univariate or multivariate series. It uses the median prediction (when dealing with stochastic forecasts). Notes ----- Darts has several metrics to evaluate probabilistic forecasts. For conformal models, we recommend using "per time step" quantile interval metrics (see `here <https://unit8co.github.io/darts/generated_api/darts.metrics.html>`_). You can specify which intervals to evaluate by setting `metric_kwargs={'q_interval': my_intervals}`. To check all intervals used by your conformal model `my_model`, you can set ``{'q_interval': my_model.q_interval}``. Parameters ---------- series A (sequence of) target time series used to successively compute the historical forecasts. Will use the past of this series for calibration. The series should not have any overlap with the series used to train the forecasting model. past_covariates Optionally, a (sequence of) past-observed covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. future_covariates Optionally, a (sequence of) future-known covariate time series for every input time series in `series`. Their dimension must match that of the past covariates used for training. Will use this series for calibration. historical_forecasts Optionally, the (or a sequence of / a sequence of sequences of) historical forecasts time series to be evaluated. Corresponds to the output of :meth:`historical_forecasts() <darts.models.forecasting.conformal_models.ConformalModel.historical_forecasts>`. The same `series` and `last_points_only` values must be passed that were used to generate the historical forecasts. If provided, will skip historical forecasting and ignore all parameters except `series`, `last_points_only`, `metric`, and `reduction`. forecast_horizon The forecast horizon for the predictions. num_samples Number of times a prediction is sampled from the calibrated quantile predictions using linear interpolation in-between the quantiles. For larger values, the sample distribution approximates the calibrated quantile predictions. train_length Currently ignored by conformal models. start Optionally, the first point in time at which a prediction is computed. This parameter supports: ``int``, ``pandas.Timestamp``, and ``None``. If an ``int``, it is either the index position of the first prediction point for `series` with a `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to the index position with `start_format="position"`. If a ``pandas.Timestamp``, it is the time stamp of the first prediction point. If ``None``, the first prediction point will automatically be set to: - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first predictable point is earlier than the first trainable point. - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`), or `retrain` is a ``Callable`` and the first trainable point is earlier than the first predictable point. - the first trainable point (given `train_length`) otherwise Note: If the model uses a shifted output (`output_chunk_shift > 0`), then the first predicted point is also shifted by `output_chunk_shift` points into the future. Note: Raises a ValueError if `start` yields a time outside the time index of `series`. Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. start_format Defines the `start` format. If set to ``'position'``, `start` corresponds to the index position of the first predicted point and can range from `(-len(series), len(series) - 1)`. If set to ``'value'``, `start` corresponds to the index value/label of the first predicted point. Will raise an error if the value is not in `series`' index. Default: ``'value'``. stride The number of time steps between two consecutive predictions. retrain Currently ignored by conformal models. overlap_end Whether the returned forecasts can go beyond the series' end or not. last_points_only Whether to return only the last point of each historical forecast. If set to ``True``, the method returns a single ``TimeSeries`` (for each time series in `series`) containing the successive point forecasts. Otherwise, returns a list of historical ``TimeSeries`` forecasts. metric Either one of Darts' "per time step" metrics (see `here <https://unit8co.github.io/darts/generated_api/darts.metrics.html>`_), or a custom metric that has an identical signature as Darts' "per time step" metrics, uses decorators :func:`~darts.metrics.metrics.multi_ts_support` and :func:`~darts.metrics.metrics.multi_ts_support`, and returns one value per time step. verbose Whether to print the progress. show_warnings Whether to show warnings related to historical forecasts optimization, or parameters `start` and `train_length`. predict_likelihood_parameters If set to `True`, generates the quantile predictions directly. Only supported with `num_samples = 1`. enable_optimization Whether to use the optimized version of `historical_forecasts` when supported and available. Default: ``True``. data_transformers Optionally, a dictionary of `BaseDataTransformer` or `Pipeline` to apply to the corresponding series (possibles keys; "series", "past_covariates", "future_covariates"). If provided, all input series must be in the un-transformed space. For fittable transformer / pipeline: - if `retrain=True`, the data transformer re-fit on the training data at each historical forecast step (currently ignored by conformal models). - if `retrain=False`, the data transformer transforms the series once before all the forecasts. The fitted transformer is used to transform the input during both training and prediction. If the transformation is invertible, the forecasts will be inverse-transformed. Only effective when `historical_forecasts=None`. metric_kwargs Additional arguments passed to `metric()`, such as `'n_jobs'` for parallelization, `'m'` for scaled metrics, etc. Will pass arguments only if they are present in the corresponding metric signature. Ignores reduction arguments `"series_reduction", "component_reduction", "time_reduction"`, and parameter `'insample'` for scaled metrics (e.g. mase`, `rmsse`, ...), as they are handled internally. fit_kwargs Currently ignored by conformal models. predict_kwargs Optionally, some additional arguments passed to the model `predict()` method. sample_weight Currently ignored by conformal models. values_only Whether to return the residuals as `np.ndarray`. If `False`, returns residuals as `TimeSeries`. Returns ------- TimeSeries Residual `TimeSeries` for a single `series` and `historical_forecasts` generated with `last_points_only=True`. list[TimeSeries] A list of residual `TimeSeries` for a sequence (list) of `series` with `last_points_only=True`. The residual list has length `len(series)`. list[list[TimeSeries]] A list of lists of residual `TimeSeries` for a sequence of `series` with `last_points_only=False`. The outer residual list has length `len(series)`. The inner lists consist of the residuals from all possible series-specific historical forecasts. """ return super().residuals( series=series, past_covariates=past_covariates, future_covariates=future_covariates, historical_forecasts=historical_forecasts, forecast_horizon=forecast_horizon, num_samples=num_samples, train_length=train_length, start=start, start_format=start_format, stride=stride, retrain=retrain, overlap_end=overlap_end, last_points_only=last_points_only, metric=metric, verbose=verbose, show_warnings=show_warnings, predict_likelihood_parameters=predict_likelihood_parameters, enable_optimization=enable_optimization, data_transformers=data_transformers, metric_kwargs=metric_kwargs, fit_kwargs=fit_kwargs, predict_kwargs=predict_kwargs, sample_weight=sample_weight, values_only=values_only, )
@random_method def _calibrate_forecasts( self, series: Sequence[TimeSeries], forecasts: Union[Sequence[Sequence[TimeSeries]], Sequence[TimeSeries]], num_samples: int = 1, start: Optional[Union[pd.Timestamp, int, str]] = None, start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, last_points_only: bool = True, verbose: bool = False, show_warnings: bool = True, predict_likelihood_parameters: bool = False, ) -> Union[TimeSeries, list[TimeSeries], list[list[TimeSeries]]]: """Generate calibrated historical forecasts. In general the workflow of the models to produce one calibrated forecast/prediction per step in the horizon is as follows: - Generate historical forecasts for `series` with stride `cal_stride` (using the forecasting model) - Extract a calibration set: The forecasts from the most recent past to use as calibration for one conformal prediction. The number of examples to use can be defined at model creation with parameter `cal_length`. It automatically extracts the calibration set from the most recent past of your input series (`series`, `past_covariates`, ...). - Compute the errors/non-conformity scores (specific to each conformal model) on these historical forecasts - Compute the quantile values from the errors / non-conformity scores (using our desired quantiles set at model creation with parameter `quantiles`). - Compute the conformal prediction: Using these quantile values, add calibrated intervals to (or adjust the existing intervals of) the forecasting model's predictions. """ cal_stride = self.cal_stride cal_length = self.cal_length metric, metric_kwargs = self._residuals_metric residuals = self.model.residuals( series=series, historical_forecasts=forecasts, overlap_end=overlap_end, last_points_only=last_points_only, verbose=verbose, show_warnings=show_warnings, values_only=True, metric=metric, metric_kwargs=metric_kwargs, ) outer_iterator = enumerate(zip(series, forecasts, residuals)) if len(series) > 1: # Use tqdm on the outer loop only if there's more than one series to iterate over # (otherwise use tqdm on the inner loop). outer_iterator = _build_tqdm_iterator( outer_iterator, verbose, total=len(series), desc="conformal forecasts", ) cp_hfcs = [] for series_idx, (series_, s_hfcs, res) in outer_iterator: cp_preds = [] # no historical forecasts were generated if not s_hfcs: cp_hfcs.append(cp_preds) continue last_hfc = s_hfcs if last_points_only else s_hfcs[-1] # compute the minimum required number of useful calibration residuals # at least one or `cal_length` examples min_n_cal = cal_length or 1 # `last_points_only=False` requires additional examples to use most recent information # from all steps in the horizon if not last_points_only: min_n_cal += math.ceil(forecast_horizon / cal_stride) - 1 # determine first forecast index for conformal prediction # we need at least one residual per point in the horizon prior to the first conformal forecast horizon_ocs = forecast_horizon + self.output_chunk_shift first_idx_train = math.ceil(horizon_ocs / cal_stride) # plus some additional examples based on `cal_length` if cal_length is not None: first_idx_train += cal_length - 1 # check if later we need to drop some residuals without useful information (unknown residuals) if overlap_end: delta_end = n_steps_between( end=last_hfc.end_time(), start=series_.end_time(), freq=series_.freq, ) else: delta_end = 0 # ignore residuals without useful information if last_points_only and delta_end > 0: # useful residual information only up until the forecast *ending* at the last time step in `series` ignore_n_residuals = delta_end elif not last_points_only and delta_end >= forecast_horizon: # useful residual information only up until the forecast *starting* at the last time step in `series` ignore_n_residuals = delta_end - forecast_horizon + 1 else: # ignore at least one forecast residuals from the end, since we can only use prior residuals ignore_n_residuals = self.output_chunk_shift + 1 # with last points only, ignore the last `horizon` residuals to avoid look-ahead bias if last_points_only: ignore_n_residuals += forecast_horizon - 1 # get the last index respecting `cal_stride` last_res_idx = -math.ceil(ignore_n_residuals / cal_stride) # get only useful residuals res = res[:last_res_idx] if first_idx_train >= len(s_hfcs) or len(res) < min_n_cal: raise_log( ValueError( "Could not build the minimum required calibration input with the provided " f"`series` and `*_covariates` at series index: {series_idx}. " f"Expected to generate at least `{min_n_cal}` calibration forecasts with known residuals " f"before the first conformal forecast, but could only generate `{len(res)}`." ), logger=logger, ) # adjust first index based on `start` first_idx_start = 0 if start == "end": # called from `predict()`; start at the last forecast first_idx_start = len(s_hfcs) - 1 elif start is not None: # called from `historical_forecasts()`: use user-defined start # the conformal forecastable index ranges from the start of the first valid historical # forecast until the start of the last historical forecast historical_forecasts_time_index = ( s_hfcs[first_idx_train].start_time(), s_hfcs[-1].start_time(), ) # adjust forecast start points in case of output shift or `last_points_only=True` adjust_idx = ( self.output_chunk_shift + int(last_points_only) * (forecast_horizon - 1) ) * series_.freq historical_forecasts_time_index = ( historical_forecasts_time_index[0] - adjust_idx, historical_forecasts_time_index[1] - adjust_idx, ) # adjust forecastable times based on user start, assuming hfcs were generated with `stride=1` first_start_time, _ = _adjust_historical_forecasts_time_index( series=series_, series_idx=series_idx, start=start, start_format=start_format, stride=stride, historical_forecasts_time_index=historical_forecasts_time_index, show_warnings=show_warnings, ) # find position relative to start first_idx_start = n_steps_between( first_start_time + adjust_idx, s_hfcs[0].start_time(), freq=series_.freq, ) # adjust by stride first_idx_start = math.ceil(first_idx_start / cal_stride) # get final first index first_fc_idx = max([first_idx_train, first_idx_start]) # bring `res` from shape (forecasting steps, n components, n past residuals) into # shape (forecasting steps, n components, n past residuals) if last_points_only: # -> (1, n components, n samples * n past residuals) res = res.transpose(2, 1, 0) else: # rearrange the residuals to avoid look-ahead bias and to have the same number of examples per # point in the horizon. We want the most recent residuals in the past for each step in the horizon. res = np.array(res) # go through each step in the horizon, use all useful information from the end (most recent values), # and skip information at beginning (most distant past); # -> (forecast horizon, n components, n past residuals) res_ = [] for idx_horizon in range(forecast_horizon): n = idx_horizon + 1 # ignore residuals at beginning idx_fc_start = math.floor((forecast_horizon - n) / cal_stride) # keep as many residuals as possible from end idx_fc_end = -( math.ceil(forecast_horizon / cal_stride) - (idx_fc_start + 1) ) res_.append(res[idx_fc_start : idx_fc_end or None, idx_horizon]) res = np.concatenate(res_, axis=2).T # get the last conformal forecast index (exclusive) based on the residual examples last_fc_idx = res.shape[2] + math.ceil(horizon_ocs / cal_stride) # forecasts are stridden, so stride must be relative rel_stride = math.ceil(stride / cal_stride) def conformal_predict(idx_, pred_vals_): # get the last residual index for calibration, `cal_end` is exclusive # to avoid look-ahead bias, use only residuals from before the conformal forecast start point; # for `last_points_only=True`, the last residual historically available at the forecasting # point is `horizon_ocs - 1` steps before. The same applies to `last_points_only=False` thanks to # the residual rearrangement cal_end = ( first_fc_idx + idx_ * rel_stride - (math.ceil(horizon_ocs / cal_stride) - 1) ) # optionally, use only `cal_length` residuals cal_start = cal_end - cal_length if cal_length is not None else None # calibrate and apply interval to the forecasts q_hat_ = self._calibrate_interval(res[:, :, cal_start:cal_end]) vals = self._apply_interval(pred_vals_, q_hat_) # optionally, generate samples from the intervals if not predict_likelihood_parameters: vals = sample_from_quantiles( vals, self.quantiles, num_samples=num_samples ) return vals # historical conformal prediction # for each forecast, compute calibrated quantile intervals based on past residuals if last_points_only: inner_iterator = enumerate( s_hfcs.all_values(copy=False)[first_fc_idx:last_fc_idx:rel_stride] ) else: inner_iterator = enumerate(s_hfcs[first_fc_idx:last_fc_idx:rel_stride]) comp_names_out = ( self.likelihood.component_names(series_) if predict_likelihood_parameters else None ) if len(series) == 1: # only use progress bar if there's no outer loop inner_iterator = _build_tqdm_iterator( inner_iterator, verbose, total=(last_fc_idx - 1 - first_fc_idx) // rel_stride + 1, desc="conformal forecasts", ) if last_points_only: for idx, pred_vals in inner_iterator: pred_vals = np.expand_dims(pred_vals, 0) cp_pred = conformal_predict(idx, pred_vals) cp_preds.append(cp_pred) cp_preds = _build_forecast_series( points_preds=np.concatenate(cp_preds, axis=0), input_series=series_, custom_columns=comp_names_out, time_index=generate_index( start=s_hfcs._time_index[first_fc_idx], length=len(cp_preds), freq=series_.freq * stride, name=series_._time_index.name, ), with_static_covs=not predict_likelihood_parameters, with_hierarchy=False, ) else: for idx, pred in inner_iterator: pred_vals = pred.all_values(copy=False) cp_pred = conformal_predict(idx, pred_vals) cp_pred = _build_forecast_series( points_preds=cp_pred, input_series=series_, custom_columns=comp_names_out, time_index=pred._time_index, with_static_covs=not predict_likelihood_parameters, with_hierarchy=False, ) cp_preds.append(cp_pred) cp_hfcs.append(cp_preds) return cp_hfcs def _clean(self) -> Self: """Cleans the model and sub-model.""" cleaned_model = super()._clean() cleaned_model.model = cleaned_model.model._clean() return cleaned_model
[docs] def save( self, path: Optional[Union[str, os.PathLike, BinaryIO]] = None, clean: bool = False, **pkl_kwargs, ) -> None: """ Saves the conformal model under a given path or file handle. Additionally, two files are stored if `self.model` is a `TorchForecastingModel`. Example for saving and loading a :class:`ConformalNaiveModel`: .. highlight:: python .. code-block:: python from darts.datasets import AirPassengersDataset from darts.models import ConformalNaiveModel, LinearRegressionModel series = AirPassengersDataset().load() forecasting_model = LinearRegressionModel(lags=4).fit(series) model = ConformalNaiveModel( model=forecasting_model, quantiles=[0.1, 0.5, 0.9], ) model.save("my_model.pkl") model_loaded = ConformalNaiveModel.load("my_model.pkl") .. Parameters ---------- path Path or file handle under which to save the ensemble model at its current state. If no path is specified, the ensemble model is automatically saved under ``"{ConformalNaiveModel}_{YYYY-mm-dd_HH_MM_SS}.pkl"``. If the forecasting model is a `TorchForecastingModel`, two files (model object and checkpoint) are saved under ``"{path}.{ModelClass}.pt"`` and ``"{path}.{ModelClass}.ckpt"``. clean Whether to store a cleaned version of the model. If `True`, the training series and covariates are removed. If the underlying forecasting `model` is a `TorchForecastingModel`, will additionally remove all Lightning Trainer-related parameters. Note: After loading a model stored with `clean=True`, a `series` must be passed 'predict()', `historical_forecasts()` and other forecasting methods. pkl_kwargs Keyword arguments passed to `pickle.dump()` """ if path is None: # default path path = self._default_save_path() + ".pkl" super().save(path, clean=clean, **pkl_kwargs) if TORCH_AVAILABLE and issubclass(type(self.model), TorchForecastingModel): path_tfm = f"{path}.{type(self.model).__name__}.pt" self.model.save(path=path_tfm, clean=clean)
[docs] @staticmethod def load( path: Union[str, os.PathLike, BinaryIO], pl_trainer_kwargs: Optional[dict] = None, **kwargs, ) -> "ConformalModel": """ Loads a model from a given path or file handle. Parameters ---------- path Path or file handle from which to load the model. pl_trainer_kwargs Only effective if the underlying forecasting model is a `TorchForecastingModel`. Optionally, a set of kwargs to create a new Lightning Trainer used to configure the model for downstream tasks (e.g. prediction). Some examples include specifying the batch size or moving the model to CPU/GPU(s). Check the `Lightning Trainer documentation <https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html>`_ for more information about the supported kwargs. **kwargs Only effective if the underlying forecasting model is a `TorchForecastingModel`. Additional kwargs for PyTorch Lightning's :func:`LightningModule.load_from_checkpoint()` method, For more information, read the `official documentation <https://pytorch-lightning.readthedocs.io/en/stable/ common/lightning_module.html#load-from-checkpoint>`_. """ model: ConformalModel = GlobalForecastingModel.load(path) if TORCH_AVAILABLE and issubclass(type(model.model), TorchForecastingModel): path_tfm = f"{path}.{type(model.model).__name__}.pt" model.model = TorchForecastingModel.load( path_tfm, pl_trainer_kwargs=pl_trainer_kwargs, **kwargs, ) return model
@abstractmethod def _calibrate_interval( self, residuals: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: """Computes the lower and upper calibrated forecast intervals based on residuals. Parameters ---------- residuals The residuals are expected to have shape (horizon, n components, n historical forecasts * n samples) """ @abstractmethod def _apply_interval(self, pred: np.ndarray, q_hat: tuple[np.ndarray, np.ndarray]): """Applies the calibrated interval to the predicted quantiles. Returns an array with `len(quantiles)` conformalized quantile predictions (lower quantiles, model forecast, upper quantiles) per component. E.g. output is `(target1_q1, target1_pred, target1_q2, target2_q1, ...)` """ @property @abstractmethod def _residuals_metric(self) -> tuple[METRIC_TYPE, Optional[dict]]: """Gives the "per time step" metric and optional metric kwargs used to compute residuals / non-conformity scores.""" def _historical_forecasts_sanity_checks(self, *args: Any, **kwargs: Any) -> None: super()._historical_forecasts_sanity_checks(*args, **kwargs, is_conformal=True) @property def output_chunk_length(self) -> Optional[int]: # conformal models can predict any horizon if the calibration set is large enough return None @property def output_chunk_shift(self) -> int: return self.model.output_chunk_shift @property def _model_encoder_settings(self): raise NotImplementedError(f"not supported by `{self.__class__.__name__}`.") @property def extreme_lags( self, ) -> tuple[ Optional[int], Optional[int], Optional[int], Optional[int], Optional[int], Optional[int], int, Optional[int], ]: raise NotImplementedError(f"not supported by `{self.__class__.__name__}`.") @property def min_train_series_length(self) -> int: raise NotImplementedError(f"not supported by `{self.__class__.__name__}`.") @property def min_train_samples(self) -> int: raise NotImplementedError(f"not supported by `{self.__class__.__name__}`.") @property def supports_multivariate(self) -> bool: return self.model.supports_multivariate @property def supports_past_covariates(self) -> bool: return self.model.supports_past_covariates @property def supports_future_covariates(self) -> bool: return self.model.supports_future_covariates @property def supports_static_covariates(self) -> bool: return self.model.supports_static_covariates @property def supports_sample_weight(self) -> bool: return self.model.supports_sample_weight @property def supports_likelihood_parameter_prediction(self) -> bool: return True @property def supports_probabilistic_prediction(self) -> bool: return True @property def uses_past_covariates(self) -> bool: return self.model.uses_past_covariates @property def uses_future_covariates(self) -> bool: return self.model.uses_future_covariates @property def uses_static_covariates(self) -> bool: return self.model.uses_static_covariates @property def considers_static_covariates(self) -> bool: return self.model.considers_static_covariates @property def likelihood(self) -> Likelihood: return self._likelihood
[docs]class ConformalNaiveModel(ConformalModel): def __init__( self, model: GlobalForecastingModel, quantiles: list[float], symmetric: bool = True, cal_length: Optional[int] = None, cal_stride: int = 1, cal_num_samples: int = 500, random_state: Optional[int] = None, ): """Naive Conformal Prediction Model. A probabilistic model that adds calibrated intervals around the median forecast from a pre-trained global forecasting model. It does not have to be trained and can generated calibrated forecasts directly using the underlying trained forecasting model. It supports two symmetry modes: - `symmetric=True`: - The lower and upper interval bounds are calibrated with the same magnitude. - Non-conformity scores: uses metric `ae()` (see absolute error :func:`~darts.metrics.metrics.ae`) to compute the non-conformity scores on the calibration set. - `symmetric=False` - The lower and upper interval bounds are calibrated separately. - Non-conformity scores: uses metric `err()` (see error :func:`~darts.metrics.metrics.err`) to compute the non-conformity scores on the calibration set for the upper bounds, an `-err()` for the lower bounds. Since it is a probabilistic model, you can generate forecasts in two ways (when calling `predict()`, `historical_forecasts()`, ...): - Predict the calibrated quantile intervals directly: Pass parameters `predict_likelihood_parameters=True`, and `num_samples=1` to the forecast method. - Predict stochastic samples from the calibrated quantile intervals: Pass parameters `predict_likelihood_parameters=False`, and `num_samples>>1` to the forecast method. Conformal models can be applied to any of Darts' global forecasting model, as long as the model has been fitted before. In general the workflow of the models to produce one calibrated forecast/prediction is as follows: - Extract a calibration set: The calibration set for each conformal forecast is automatically extracted from the most recent past of your input series relative to the forecast start point. The number of calibration examples (forecast errors / non-conformity scores) to consider can be defined at model creation with parameter `cal_length`. Note that when using `cal_stride>1`, a longer history is required since the calibration examples are generated with stridden historical forecasts. - Generate historical forecasts on the calibration set (using the forecasting model) with a stride `cal_stride`. - Compute the errors/non-conformity scores (as defined above) on these historical forecasts - Compute the quantile values from the errors / non-conformity scores (using our desired quantiles set at model creation with parameter `quantiles`). - Compute the conformal prediction: Using these quantile values, add calibrated intervals to the forecasting model's predictions. Some notes: - When computing `historical_forecasts()`, `backtest()`, `residuals()`, ... the above is applied for each forecast (the forecasting model's historical forecasts are only generated once for efficiency). - For multi-horizon forecasts, the above is applied for each step in the horizon separately. Parameters ---------- model A pre-trained global forecasting model. See the list of models `here <https://unit8co.github.io/darts/#forecasting-models>`_. quantiles A list of quantiles centered around the median `q=0.5` to use. For example quantiles [0.1, 0.2, 0.5, 0.8 0.9] correspond to two intervals with (0.9 - 0.1) = 80%, and (0.8 - 0.2) 60% coverage around the median (model forecast). symmetric Whether to use symmetric non-conformity scores. If `True`, uses metric `ae()` (see :func:`~darts.metrics.metrics.ae`) to compute the non-conformity scores. If `False`, uses metric `-err()` (see :func:`~darts.metrics.metrics.err`) for the lower, and `err()` for the upper quantile interval bound. cal_length The number of past forecast errors / non-conformity scores to use as calibration for each conformal forecast (and each step in the horizon). If `None`, considers all scores. cal_stride The stride to apply when computing the historical forecasts and non-conformity scores on the calibration set. The actual conformal forecasts can have a different stride given with parameter `stride` in downstream tasks (e.g. historical forecasts, backtest, ...) cal_num_samples The number of samples to generate for each calibration forecast (if `model` is a probabilistic forecasting model). The non-conformity scores are computed on the quantile values of these forecasts (using quantiles `quantiles`). Uses `1` for deterministic models. The actual conformal forecasts can have a different number of samples given with parameter `num_samples` in downstream tasks (e.g. predict, historical forecasts, ...). random_state Control the randomness of probabilistic conformal forecasts (sample generation) across different runs. """ super().__init__( model=model, quantiles=quantiles, symmetric=symmetric, cal_length=cal_length, cal_num_samples=cal_num_samples, random_state=random_state, cal_stride=cal_stride, ) def _calibrate_interval( self, residuals: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: def q_hat_from_residuals(residuals_): # compute quantiles of shape (forecast horizon, n components, n quantile intervals) return np.quantile( residuals_, q=self.interval_range_sym, method="higher", axis=2, ).transpose((1, 2, 0)) # residuals shape (horizon, n components, n past forecasts) if self.symmetric: # symmetric (from metric `ae()`) q_hat = q_hat_from_residuals(residuals) return -q_hat, q_hat[:, :, ::-1] else: # asymmetric (from metric `err()`) q_hat = q_hat_from_residuals( np.concatenate([-residuals, residuals], axis=1) ) n_comps = residuals.shape[1] return -q_hat[:, :n_comps, :], q_hat[:, n_comps:, ::-1] def _apply_interval(self, pred: np.ndarray, q_hat: tuple[np.ndarray, np.ndarray]): # convert stochastic predictions to median if pred.shape[2] != 1: pred = np.expand_dims(np.quantile(pred, 0.5, axis=2), -1) # shape (forecast horizon, n components, n quantiles) pred = np.concatenate([pred + q_hat[0], pred, pred + q_hat[1]], axis=2) # -> (forecast horizon, n components * n quantiles) return pred.reshape(len(pred), -1) @property def _residuals_metric(self) -> tuple[METRIC_TYPE, Optional[dict]]: return (metrics.ae if self.symmetric else metrics.err), None
[docs]class ConformalQRModel(ConformalModel): def __init__( self, model: GlobalForecastingModel, quantiles: list[float], symmetric: bool = True, cal_length: Optional[int] = None, cal_stride: int = 1, cal_num_samples: int = 500, random_state: Optional[int] = None, ): """Conformalized Quantile Regression Model. A probabilistic model that calibrates the quantile predictions from a pre-trained probabilistic global forecasting model. It does not have to be trained and can generated calibrated forecasts directly using the underlying trained forecasting model. It supports two symmetry modes: - `symmetric=True`: - The lower and upper quantile predictions are calibrated with the same magnitude. - Non-conformity scores: uses metric `incs_qr(symmetric=True)` (see Non-Conformity Score for Quantile Regression :func:`~darts.metrics.metrics.incs_qr`) to compute the non-conformity scores on the calibration set. - `symmetric=False` - The lower and upper quantile predictions are calibrated separately. - Non-conformity scores: uses metric `incs_qr(symmetric=False)` (see Non-Conformity Score for Quantile Regression :func:`~darts.metrics.metrics.incs_qr`) to compute the non-conformity scores for the upper and lower bound separately. Since it is a probabilistic model, you can generate forecasts in two ways (when calling `predict()`, `historical_forecasts()`, ...): - Predict the calibrated quantile intervals directly: Pass parameters `predict_likelihood_parameters=True`, and `num_samples=1` to the forecast method. - Predict stochastic samples from the calibrated quantile intervals: Pass parameters `predict_likelihood_parameters=False`, and `num_samples>>1` to the forecast method. Conformal models can be applied to any of Darts' global forecasting model, as long as the model has been fitted before. In general the workflow of the models to produce one calibrated forecast/prediction is as follows: - Extract a calibration set: The calibration set for each conformal forecast is automatically extracted from the most recent past of your input series relative to the forecast start point. The number of calibration examples (forecast errors / non-conformity scores) to consider can be defined at model creation with parameter `cal_length`. Note that when using `cal_stride>1`, a longer history is required since the calibration examples are generated with stridden historical forecasts. - Generate historical forecasts (quantile predictions) on the calibration set (using the forecasting model) with a stride `cal_stride`. - Compute the errors/non-conformity scores (as defined above) on these historical quantile predictions - Compute the quantile values from the errors / non-conformity scores (using our desired quantiles set at model creation with parameter `quantiles`). - Compute the conformal prediction: Using these quantile values, calibrate the predicted quantiles from the forecasting model's predictions. Some notes: - When computing `historical_forecasts()`, `backtest()`, `residuals()`, ... the above is applied for each forecast (the forecasting model's historical forecasts are only generated once for efficiency). - For multi-horizon forecasts, the above is applied for each step in the horizon separately. Parameters ---------- model A pre-trained global forecasting model. See the list of models `here <https://unit8co.github.io/darts/#forecasting-models>`_. quantiles A list of quantiles centered around the median `q=0.5` to use. For example quantiles [0.1, 0.2, 0.5, 0.8 0.9] correspond to two intervals with (0.9 - 0.1) = 80%, and (0.8 - 0.2) 60% coverage around the median (model forecast). symmetric Whether to use symmetric non-conformity scores. If `True`, uses symmetric metric `incs_qr(..., symmetric=True)` (see :func:`~darts.metrics.metrics.incs_qr`) to compute the non-conformity scores. If `False`, uses asymmetric metric `incs_qr(..., symmetric=False)` with individual scores for the lower- and upper quantile interval bounds. cal_length The number of past forecast errors / non-conformity scores to use as calibration for each conformal forecast (and each step in the horizon). If `None`, considers all scores. cal_stride The stride to apply when computing the historical forecasts and non-conformity scores on the calibration set. The actual conformal forecasts can have a different stride given with parameter `stride` in downstream tasks (e.g. historical forecasts, backtest, ...) cal_num_samples The number of samples to generate for each calibration forecast (if `model` is a probabilistic forecasting model). The non-conformity scores are computed on the quantile values of these forecasts (using quantiles `quantiles`). Uses `1` for deterministic models. The actual conformal forecasts can have a different number of samples given with parameter `num_samples` in downstream tasks (e.g. predict, historical forecasts, ...). random_state Control the randomness of probabilistic conformal forecasts (sample generation) across different runs. """ if not model.supports_probabilistic_prediction: raise_log( ValueError( "`model` must support probabilistic forecasting. Consider using a `likelihood` at " "forecasting model creation, or use another conformal model." ), logger=logger, ) super().__init__( model=model, quantiles=quantiles, symmetric=symmetric, cal_length=cal_length, cal_num_samples=cal_num_samples, random_state=random_state, cal_stride=cal_stride, ) def _calibrate_interval( self, residuals: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: n_comps = residuals.shape[1] // ( len(self.interval_range) * (1 + int(not self.symmetric)) ) n_intervals = len(self.interval_range) def q_hat_from_residuals(residuals_): # TODO: is there a more efficient way? # compute quantiles with shape (horizon, n components, n quantile intervals) # over all past residuals q_hat_tmp = np.quantile( residuals_, q=self.interval_range_sym, method="higher", axis=2 ).transpose((1, 2, 0)) q_hat_ = np.empty((len(residuals_), n_comps, n_intervals)) for i in range(n_intervals): for c in range(n_comps): q_hat_[:, c, i] = q_hat_tmp[:, i + c * n_intervals, i] return q_hat_ if self.symmetric: # symmetric has one nc-score per interval (from metric `incs_qr(symmetric=True)`) # residuals shape (horizon, n components * n intervals, n past forecasts) q_hat = q_hat_from_residuals(residuals) return -q_hat, q_hat[:, :, ::-1] else: # asymmetric has two nc-score per interval (for lower and upper quantiles, from metric # `incs_qr(symmetric=False)`) # lower and upper residuals are concatenated along axis=1; # residuals shape (horizon, n components * n intervals * 2, n past forecasts) half_idx = residuals.shape[1] // 2 q_hat_lo = q_hat_from_residuals(residuals[:, :half_idx]) q_hat_hi = q_hat_from_residuals(residuals[:, half_idx:]) return -q_hat_lo, q_hat_hi[:, :, ::-1] def _apply_interval(self, pred: np.ndarray, q_hat: tuple[np.ndarray, np.ndarray]): # get quantile predictions with shape (n times, n components, n quantiles) pred = np.quantile(pred, self.quantiles, axis=2).transpose((1, 2, 0)) # shape (forecast horizon, n components, n quantiles) pred = np.concatenate( [ pred[:, :, : self.idx_median] + q_hat[0], # lower quantiles pred[:, :, self.idx_median : self.idx_median + 1], # model forecast pred[:, :, self.idx_median + 1 :] + q_hat[1], # upper quantiles ], axis=2, ) # -> (forecast horizon, n components * n quantiles) return pred.reshape(len(pred), -1) @property def _residuals_metric(self) -> tuple[METRIC_TYPE, Optional[dict]]: return metrics.incs_qr, { "q_interval": self.q_interval, "symmetric": self.symmetric, }
def _get_calibration_hfc_start( series: Sequence[TimeSeries], horizon: int, output_chunk_shift: int, cal_length: Optional[int], cal_stride: int, start: Optional[Union[pd.Timestamp, int, Literal["end"]]], start_format: Literal["position", "value"], ) -> tuple[Optional[Union[int, pd.Timestamp]], Literal["position", "value"]]: """Find the calibration start point (CSP) (for historical forecasts on calibration set). - If `start=None`, the CSP is also `None` (all possible hfcs). - If `start="end"` (when calling `predict()`), returns the CSP as a positional index relative to the end of the series (<0). - Otherwise (when calling `historical_forecasts()`), the CSP is the start value (`start_format="value"`) or start position (`start_format="position"`) adjusted by the positions computed for the case above. If this function is called from `historical_forecasts`, the sanity checks guarantee the following: - `start` cannot be a `float` - when `start_format='value'`, all `series` have the same frequency """ if start is None: return start, start_format cal_start_format: Literal["position", "value"] horizon_ocs = horizon + output_chunk_shift if cal_length is not None: # we only need `cal_length` forecasts with stride `cal_stride` before the `predict()` start point; # the last valid calibration forecast must start at least `horizon_ocs` before `predict()` start add_steps = math.ceil(horizon_ocs / cal_stride) - 1 start_idx_rel = -cal_stride * (cal_length + add_steps) cal_start_format = "position" elif cal_stride > 1: # we need all forecasts with stride `cal_stride` before the `predict()` start point max_len_series = max(len(series_) for series_ in series) start_idx_rel = -cal_stride * math.ceil(max_len_series / cal_stride) cal_start_format = "position" else: # we need all possible forecasts with `cal_stride=1` start_idx_rel, cal_start_format = None, "value" if start == "end": # `predict()` is relative to the end return start_idx_rel, cal_start_format # `historical_forecasts()` is relative to `start` start_is_position = isinstance(start, (int, np.int64)) and ( start_format == "position" or series[0]._has_datetime_index ) cal_start_format = start_format if start_idx_rel is None: cal_start = start_idx_rel elif start_is_position: cal_start = start + start_idx_rel # if start switches sign, it would be relative to the end; # correct it to be positive (relative to beginning) if cal_start < 0 <= start: cal_start += math.ceil(abs(cal_start) / cal_stride) * cal_stride else: cal_start = start + start_idx_rel * series[0].freq return cal_start, cal_start_format