"""
XGBoost Model
-------------
Regression model based on XGBoost.
This implementation comes with the ability to produce probabilistic forecasts.
"""
from functools import partial
from typing import List, Optional, Sequence, Union
import numpy as np
import xgboost as xgb
from darts.logging import get_logger, raise_if_not
from darts.models.forecasting.regression_model import (
FUTURE_LAGS_TYPE,
LAGS_TYPE,
RegressionModel,
_LikelihoodMixin,
)
from darts.timeseries import TimeSeries
logger = get_logger(__name__)
# Check whether we are running xgboost >= 2.0.0 for quantile regression
tokens = xgb.__version__.split(".")
xgb_200_or_above = int(tokens[0]) >= 2
[docs]def xgb_quantile_loss(labels: np.ndarray, preds: np.ndarray, quantile: float):
"""Custom loss function for XGBoost to compute quantile loss gradient.
Inspired from: https://gist.github.com/Nikolay-Lysenko/06769d701c1d9c9acb9a66f2f9d7a6c7
This computes the gradient of the pinball loss between predictions and target labels.
"""
raise_if_not(0 <= quantile <= 1, "Quantile must be between 0 and 1.", logger)
errors = preds - labels
left_mask = errors < 0
right_mask = errors > 0
grad = -quantile * left_mask + (1 - quantile) * right_mask
hess = np.ones_like(preds)
return grad, hess
[docs]class XGBModel(RegressionModel, _LikelihoodMixin):
def __init__(
self,
lags: Optional[LAGS_TYPE] = None,
lags_past_covariates: Optional[LAGS_TYPE] = None,
lags_future_covariates: Optional[FUTURE_LAGS_TYPE] = None,
output_chunk_length: int = 1,
output_chunk_shift: int = 0,
add_encoders: Optional[dict] = None,
likelihood: Optional[str] = None,
quantiles: Optional[List[float]] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
use_static_covariates: bool = True,
**kwargs,
):
"""XGBoost Model
Parameters
----------
lags
Lagged target `series` values used to predict the next time step/s.
If an integer, must be > 0. Uses the last `n=lags` past lags; e.g. `(-1, -2, ..., -lags)`, where `0`
corresponds the first predicted time step of each sample. If `output_chunk_shift > 0`, then
lag `-1` translates to `-1 - output_chunk_shift` steps before the first prediction step.
If a list of integers, each value must be < 0. Uses only the specified values as lags.
If a dictionary, the keys correspond to the `series` component names (of the first series when
using multiple series) and the values correspond to the component lags (integer or list of integers). The
key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
components are missing and the 'default_lags' key is not provided.
lags_past_covariates
Lagged `past_covariates` values used to predict the next time step/s.
If an integer, must be > 0. Uses the last `n=lags_past_covariates` past lags; e.g. `(-1, -2, ..., -lags)`,
where `0` corresponds to the first predicted time step of each sample. If `output_chunk_shift > 0`, then
lag `-1` translates to `-1 - output_chunk_shift` steps before the first prediction step.
If a list of integers, each value must be < 0. Uses only the specified values as lags.
If a dictionary, the keys correspond to the `past_covariates` component names (of the first series when
using multiple series) and the values correspond to the component lags (integer or list of integers). The
key 'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
components are missing and the 'default_lags' key is not provided.
lags_future_covariates
Lagged `future_covariates` values used to predict the next time step/s. The lags are always relative to the
first step in the output chunk, even when `output_chunk_shift > 0`.
If a tuple of `(past, future)`, both values must be > 0. Uses the last `n=past` past lags and `n=future`
future lags; e.g. `(-past, -(past - 1), ..., -1, 0, 1, .... future - 1)`, where `0` corresponds the first
predicted time step of each sample. If `output_chunk_shift > 0`, the position of negative lags differ from
those of `lags` and `lags_past_covariates`. In this case a future lag `-5` would point at the same
step as a target lag of `-5 + output_chunk_shift`.
If a list of integers, uses only the specified values as lags.
If a dictionary, the keys correspond to the `future_covariates` component names (of the first series when
using multiple series) and the values correspond to the component lags (tuple or list of integers). The key
'default_lags' can be used to provide default lags for un-specified components. Raises and error if some
components are missing and the 'default_lags' key is not provided.
output_chunk_length
Number of time steps predicted at once (per chunk) by the internal model. It is not the same as forecast
horizon `n` used in `predict()`, which is the desired number of prediction points generated using a
one-shot- or autoregressive forecast. Setting `n <= output_chunk_length` prevents auto-regression. This is
useful when the covariates don't extend far enough into the future, or to prohibit the model from using
future values of past and / or future covariates for prediction (depending on the model's covariate
support).
output_chunk_shift
Optionally, the number of steps to shift the start of the output chunk into the future (relative to the
input chunk end). This will create a gap between the input (history of target and past covariates) and
output. If the model supports `future_covariates`, the `lags_future_covariates` are relative to the first
step in the shifted output chunk. Predictions will start `output_chunk_shift` steps after the end of the
target `series`. If `output_chunk_shift` is set, the model cannot generate autoregressive predictions
(`n > output_chunk_length`).
add_encoders
A large number of past and future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to
transform the generated covariates. This happens all under one hood and only needs to be specified at
model creation.
Read :meth:`SequentialEncoder <darts.dataprocessing.encoders.SequentialEncoder>` to find out more about
``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features:
.. highlight:: python
.. code-block:: python
def encode_year(idx):
return (idx.year - 1950) / 50
add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['relative'], 'future': ['relative']},
'custom': {'past': [encode_year]},
'transformer': Scaler(),
'tz': 'CET'
}
..
likelihood
Can be set to `poisson` or `quantile`. If set, the model will be probabilistic, allowing sampling at
prediction time. This will overwrite any `objective` parameter.
quantiles
Fit the model to these quantiles if the `likelihood` is set to `quantile`.
random_state
Control the randomness in the fitting procedure and for sampling.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
use_static_covariates
Whether the model should use static covariate information in case the input `series` passed to ``fit()``
contain static covariates. If ``True``, and static covariates are available at fitting time, will enforce
that all target `series` have the same static covariate dimensionality in ``fit()`` and ``predict()``.
**kwargs
Additional keyword arguments passed to `xgb.XGBRegressor`.
Examples
--------
Deterministic forecasting, using past/future covariates (optional)
>>> from darts.datasets import WeatherDataset
>>> from darts.models import XGBModel
>>> series = WeatherDataset().load()
>>> # predicting atmospheric pressure
>>> target = series['p (mbar)'][:100]
>>> # optionally, use past observed rainfall (pretending to be unknown beyond index 100)
>>> past_cov = series['rain (mm)'][:100]
>>> # optionally, use future temperatures (pretending this component is a forecast)
>>> future_cov = series['T (degC)'][:106]
>>> # predict 6 pressure values using the 12 past values of pressure and rainfall, as well as the 6 temperature
>>> # values corresponding to the forecasted period
>>> model = XGBModel(
>>> lags=12,
>>> lags_past_covariates=12,
>>> lags_future_covariates=[0,1,2,3,4,5],
>>> output_chunk_length=6,
>>> )
>>> model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
>>> pred = model.predict(6)
>>> pred.values()
array([[1005.9185 ],
[1005.8315 ],
[1005.7878 ],
[1005.72626],
[1005.7475 ],
[1005.76074]])
"""
kwargs["random_state"] = random_state # seed for tree learner
self.kwargs = kwargs
self._median_idx = None
self._model_container = None
self.quantiles = None
self.likelihood = likelihood
self._rng = None
# parse likelihood
available_likelihoods = ["poisson", "quantile"] # to be extended
if likelihood is not None:
self._check_likelihood(likelihood, available_likelihoods)
if likelihood in {"poisson"}:
self.kwargs["objective"] = f"count:{likelihood}"
elif likelihood == "quantile":
if xgb_200_or_above:
# leverage built-in Quantile Regression
self.kwargs["objective"] = "reg:quantileerror"
self.quantiles, self._median_idx = self._prepare_quantiles(quantiles)
self._model_container = self._get_model_container()
self._rng = np.random.default_rng(seed=random_state) # seed for sampling
super().__init__(
lags=lags,
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
output_chunk_shift=output_chunk_shift,
add_encoders=add_encoders,
multi_models=multi_models,
model=xgb.XGBRegressor(**self.kwargs),
use_static_covariates=use_static_covariates,
)
[docs] def fit(
self,
series: Union[TimeSeries, Sequence[TimeSeries]],
past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
val_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
val_past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
val_future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
max_samples_per_ts: Optional[int] = None,
**kwargs,
):
"""
Fits/trains the model using the provided list of features time series and the target time series.
Parameters
----------
series
TimeSeries or Sequence[TimeSeries] object containing the target values.
past_covariates
Optionally, a series or sequence of series specifying past-observed covariates
future_covariates
Optionally, a series or sequence of series specifying future-known covariates
val_series
TimeSeries or Sequence[TimeSeries] object containing the target values for evaluation dataset
val_past_covariates
Optionally, a series or sequence of series specifying past-observed covariates for evaluation dataset
val_future_covariates :
Optionally, a series or sequence of series specifying future-known covariates for evaluation dataset
max_samples_per_ts
This is an integer upper bound on the number of tuples that can be produced
per time series. It can be used in order to have an upper bound on the total size of the dataset and
ensure proper sampling. If `None`, it will read all of the individual time series in advance (at dataset
creation) to know their sizes, which might be expensive on big datasets.
If some series turn out to have a length that would allow more than `max_samples_per_ts`, only the
most recent `max_samples_per_ts` samples will be considered.
**kwargs
Additional kwargs passed to `xgb.XGBRegressor.fit()`
"""
if val_series is not None:
# Note: we create a list here as it's what's expected by XGBRegressor.fit()
# This is handled as a separate case in multioutput.py
kwargs["eval_set"] = [
self._create_lagged_data(
target_series=val_series,
past_covariates=val_past_covariates,
future_covariates=val_future_covariates,
max_samples_per_ts=max_samples_per_ts,
)
]
# TODO: XGBRegressor supports multi quantile reqression which we could leverage in the future
# see https://xgboost.readthedocs.io/en/latest/python/examples/quantile_regression.html
if self.likelihood == "quantile":
# empty model container in case of multiple calls to fit, e.g. when backtesting
self._model_container.clear()
for quantile in self.quantiles:
if xgb_200_or_above:
self.kwargs["quantile_alpha"] = quantile
else:
objective = partial(xgb_quantile_loss, quantile=quantile)
self.kwargs["objective"] = objective
self.model = xgb.XGBRegressor(**self.kwargs)
super().fit(
series=series,
past_covariates=past_covariates,
future_covariates=future_covariates,
max_samples_per_ts=max_samples_per_ts,
**kwargs,
)
self._model_container[quantile] = self.model
return self
super().fit(
series=series,
past_covariates=past_covariates,
future_covariates=future_covariates,
max_samples_per_ts=max_samples_per_ts,
**kwargs,
)
return self
def _predict_and_sample(
self,
x: np.ndarray,
num_samples: int,
predict_likelihood_parameters: bool,
**kwargs,
) -> np.ndarray:
"""Override of RegressionModel's predict method to allow for the probabilistic case"""
if self.likelihood is not None:
return self._predict_and_sample_likelihood(
x, num_samples, self.likelihood, predict_likelihood_parameters, **kwargs
)
else:
return super()._predict_and_sample(
x, num_samples, predict_likelihood_parameters, **kwargs
)
@property
def supports_probabilistic_prediction(self) -> bool:
return self.likelihood is not None
@property
def min_train_series_length(self) -> int:
# XGBModel requires a minimum of 2 training samples,
# therefore the min_train_series_length should be one
# more than for other regression models
return max(
3,
(
-self.lags["target"][0] + self.output_chunk_length + 1
if "target" in self.lags
else self.output_chunk_length
),
)