Source code for darts.models.forecasting.arima

"""
ARIMA
-----

Models for ARIMA (Autoregressive integrated moving average) [1]_.
The implementations is wrapped around `statsmodels <https://github.com/statsmodels/statsmodels>`_.

References
----------
.. [1] https://wikipedia.org/wiki/Autoregressive_integrated_moving_average
"""

from typing import List, Literal, Optional, Sequence, Tuple, Union

try:
    from typing import TypeAlias
except ImportError:
    from typing_extensions import TypeAlias

import numpy as np
from statsmodels import __version_tuple__ as statsmodels_version
from statsmodels.tsa.arima.model import ARIMA as staARIMA

from darts.logging import get_logger
from darts.models.forecasting.forecasting_model import (
    TransferableFutureCovariatesLocalForecastingModel,
)
from darts.timeseries import TimeSeries

logger = get_logger(__name__)

# Check whether we are running statsmodels >= 0.13.5 or not:
statsmodels_above_0135 = statsmodels_version > (0, 13, 5)


IntOrIntSequence: TypeAlias = Union[int, Sequence[int]]


[docs]class ARIMA(TransferableFutureCovariatesLocalForecastingModel): def __init__( self, p: IntOrIntSequence = 12, d: int = 1, q: IntOrIntSequence = 0, seasonal_order: Tuple[int, IntOrIntSequence, IntOrIntSequence, int] = ( 0, 0, 0, 0, ), trend: Optional[Union[Literal["n", "c", "t", "ct"], List[int]]] = None, random_state: Optional[int] = None, add_encoders: Optional[dict] = None, ): """ARIMA ARIMA-type models extensible with exogenous variables (future covariates) and seasonal components. Parameters ---------- p : int | Sequence[int] Order (number of time lags) of the autoregressive model (AR). If a sequence of integers, specifies the exact lags to include. d : int The order of differentiation; i.e., the number of times the data have had past values subtracted (I). q : int | Sequence[int] The size of the moving average window (MA). If a sequence of integers, specifies the exact lags to include in the window. seasonal_order: Tuple[int | Sequence[int], int, int | Sequence[int], int] The (P,D,Q,s) order of the seasonal component for the AR parameters (P), differences (D), MA parameters (Q) and periodicity (s). D and s are always integers, while P and Q may either be integers or sequence of positive integers specifying exactly which lag orders are included. trend: Literal['n', 'c', 't', 'ct'] | list[int], optional Parameter controlling the deterministic trend. Either a string or list of integers. If a string, can be 'n' for no trend, 'c' for a constant term, 't' for a linear trend in time, and 'ct' for a constant term and linear trend. If a list of integers, defines a polynomial according to `numpy.poly1d` [1]_. E.g., `[1,1,0,1]` would translate to :math:`a + bt + ct^3`. Trend term of lower order than `d + D` cannot be as they would be eliminated due to the differencing operation. Default is 'c' for models without integration, and 'n' for models with integration. add_encoders A large number of future covariates can be automatically generated with `add_encoders`. This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to transform the generated covariates. This happens all under one hood and only needs to be specified at model creation. Read :meth:`SequentialEncoder <darts.dataprocessing.encoders.SequentialEncoder>` to find out more about ``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features: .. highlight:: python .. code-block:: python def encode_year(idx): return (idx.year - 1950) / 50 add_encoders={ 'cyclic': {'future': ['month']}, 'datetime_attribute': {'future': ['hour', 'dayofweek']}, 'position': {'future': ['relative']}, 'custom': {'future': [encode_year]}, 'transformer': Scaler(), 'tz': 'CET' } .. Examples -------- >>> from darts.datasets import AirPassengersDataset >>> from darts.models import ARIMA >>> from darts.utils.timeseries_generation import datetime_attribute_timeseries >>> series = AirPassengersDataset().load() >>> # optionally, use some future covariates; e.g. the value of the month encoded as a sine and cosine series >>> future_cov = datetime_attribute_timeseries(series, "month", cyclic=True, add_length=6) >>> # define ARIMA parameters >>> model = ARIMA(p=12, d=1, q=2) >>> model.fit(series, future_covariates=future_cov) >>> pred = model.predict(6, future_covariates=future_cov) >>> pred.values() array([[451.36489334], [416.88972829], [443.10520391], [481.07892911], [502.11286509], [555.50153984]]) References ---------- .. [1] https://numpy.org/doc/stable/reference/generated/numpy.poly1d.html """ super().__init__(add_encoders=add_encoders) self.order = p, d, q self.seasonal_order = seasonal_order self.trend = trend self.model = None if statsmodels_above_0135: self._random_state = ( random_state if random_state is None else np.random.RandomState(random_state) ) else: self._random_state = None np.random.seed(random_state if random_state is not None else 0) @property def supports_multivariate(self) -> bool: return False def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None): super()._fit(series, future_covariates) self._assert_univariate(series) # storing to restore the statsmodels model results object self.training_historic_future_covariates = future_covariates m = staARIMA( series.values(copy=False), exog=future_covariates.values(copy=False) if future_covariates else None, order=self.order, seasonal_order=self.seasonal_order, trend=self.trend, ) self.model = m.fit() return self def _predict( self, n: int, series: Optional[TimeSeries] = None, historic_future_covariates: Optional[TimeSeries] = None, future_covariates: Optional[TimeSeries] = None, num_samples: int = 1, verbose: bool = False, ) -> TimeSeries: if num_samples > 1 and self.trend: logger.warning( "Trends are not well supported yet for getting probabilistic forecasts with ARIMA." "If you run into issues, try calling fit() with num_samples=1 or removing the trend from" "your model." ) super()._predict( n, series, historic_future_covariates, future_covariates, num_samples ) # updating statsmodels results object state with the new ts and covariates if series is not None: self.model = self.model.apply( series.values(copy=False), exog=( historic_future_covariates.values(copy=False) if historic_future_covariates else None ), ) if num_samples == 1: forecast = self.model.forecast( steps=n, exog=( future_covariates.values(copy=False) if future_covariates else None ), ) else: forecast = self.model.simulate( nsimulations=n, repetitions=num_samples, initial_state=self.model.states.predicted[-1, :], random_state=self._random_state, anchor="end", exog=( future_covariates.values(copy=False) if future_covariates else None ), ) # restoring statsmodels results object state if series is not None: self.model = self.model.apply( self._orig_training_series.values(copy=False), exog=( self.training_historic_future_covariates.values(copy=False) if self.training_historic_future_covariates else None ), ) return self._build_forecast_series(forecast) @property def supports_probabilistic_prediction(self) -> bool: return True @property def min_train_series_length(self) -> int: return 30