Source code for

Training Datasets Base Classes

from abc import ABC, abstractmethod
from typing import Dict, Optional, Tuple

import numpy as np
from import Dataset

from darts import TimeSeries
from darts.logging import get_logger, raise_log
from import CovariateType

logger = get_logger(__name__)
SampleIndexType = Tuple[
    int, int, int, int, Optional[int], Optional[int], Optional[int], Optional[int]

[docs]class TrainingDataset(ABC, Dataset): def __init__(self): """ Super-class for all training datasets for torch models in Darts. These include * "PastCovariates" datasets (for PastCovariatesTorchModel): containing (past_target, past_covariates, static_covariates, future_target) * "FutureCovariates" datasets (for FutureCovariatesTorchModel): containing (past_target, future_covariates, static_covariates, future_target) * "DualCovariates" datasets (for DualCovariatesTorchModel): containing (past_target, historic_future_covariates, future_covariates, static_covariates, future_target) * "MixedCovariates" datasets (for MixedCovariatesTorchModel): containing (past_target, past_covariates, historic_future_covariates, future_covariates, static_covariates, future_target) * "SplitCovariates" datasets (for SplitCovariatesTorchModel): containing (past_target, past_covariates, future_covariates, static_covariates, future_target) The covariates are optional and can be `None`. This is meant to be used for training (or validation), all data except `future_target` represents model inputs (`future_target` is the output the model are trained to predict). Darts `TorchForecastingModel`s can be fit from instances of `TrainingDataset` of the right type using the `fit_from_dataset()` method. `TrainingDataset` inherits torch `Dataset`; meaning that the implementations have to provide the `__getitem__()` method. It contains `np.ndarray` (and not `TimeSeries`), because training requires the values only, and so we can get big performance gains when slicing by returning only numpy views of the data underlying the `TimeSeries`. """ self._index_memory: Dict = {} @abstractmethod def __len__(self) -> int: pass @abstractmethod def __getitem__(self, idx: int): pass def _memory_indexer( self, target_idx: int, target_series: TimeSeries, shift: int, input_chunk_length: int, output_chunk_length: int, end_of_output_idx: int, covariate_series: Optional[TimeSeries] = None, covariate_type: CovariateType = CovariateType.NONE, sample_weight_series: Optional[TimeSeries] = None, ) -> SampleIndexType: """Returns the (start, end) indices for past target, future target and covariates (sub sets) of the current sample `i` from `target_idx`. Works for all TimeSeries index types: pd.DatetimeIndex, pd.RangeIndex (and the deprecated Int64Index) When `target_idx` is observed for the first time, it stores the position of the sample `0` within the full target time series and the (start, end) indices of all sub sets. This allows to calculate the sub set indices for all future samples `i` by simply adjusting for the difference between the positions of sample `i` and sample `0`. Parameters ---------- target_idx index of the current target TimeSeries. target_series current target TimeSeries. shift The number of time steps by which to shift the output chunks relative to the input chunks. input_chunk_length The length of the emitted past series. output_chunk_length The length of the emitted future output series. end_of_output_idx the index where the output chunk of the current sample ends in `target_series`. covariate_series current covariate TimeSeries. covariate_type the type of covariate to extract. Instance of `CovariateType`: One of (`CovariateType.PAST`, `CovariateType.FUTURE`, `CovariateType.NONE`). sample_weight_series current sample weight TimeSeries. """ covariate_start, covariate_end = None, None sample_weight_start, sample_weight_end = None, None # the first time target_idx is observed if target_idx not in self._index_memory: start_of_output_idx = end_of_output_idx - output_chunk_length start_of_input_idx = start_of_output_idx - shift # select forecast point and target period, using the previously computed indexes future_start, future_end = ( start_of_output_idx, start_of_output_idx + output_chunk_length, ) # select input period; look at the `input_chunk_length` points after start of input past_start, past_end = ( start_of_input_idx, start_of_input_idx + input_chunk_length, ) if covariate_type is not CovariateType.NONE: # not CovariateType.FUTURE -> both CovariateType.PAST and CovariateType.HISTORIC_FUTURE start = ( future_start if covariate_type is CovariateType.FUTURE else past_start ) end = future_end if covariate_type is CovariateType.FUTURE else past_end # we need to be careful with getting ranges and indexes: # to get entire range, full_range = ts[:len(ts)]; to get last index: last_idx = ts[len(ts) - 1] # extract actual index value (respects datetime- and integer-based indexes; also from non-zero # start) target_time_index = target_series._time_index covariate_time_index = covariate_series._time_index start_time = target_time_index[start] end_time = target_time_index[end - 1] if ( start_time not in covariate_time_index or end_time not in covariate_time_index ): raise_log( ValueError( f"Missing covariates; could not find {covariate_type.value} covariates in index " f"value range: {start_time} - {end_time}." ), logger=logger, ) # extract the index position (index) from index value covariate_start = covariate_time_index.get_loc(start_time) covariate_end = covariate_time_index.get_loc(end_time) + 1 # sample weight if sample_weight_series is not None: # extract the index position (index) from index value target_time_index = target_series._time_index sample_weight_time_index = sample_weight_series._time_index start_time = target_time_index[future_start] end_time = target_time_index[future_end - 1] if ( start_time not in sample_weight_time_index or end_time not in sample_weight_time_index ): raise_log( ValueError( f"Missing sample weights; could not find sample weights in index " f"value range: {start_time} - {end_time}." ), logger=logger, ) sample_weight_start = sample_weight_time_index.get_loc(start_time) sample_weight_end = sample_weight_time_index.get_loc(end_time) + 1 # store position of initial sample and all relevant sub set indices self._index_memory[target_idx] = { "end_of_output_idx": end_of_output_idx, "past_target": (past_start, past_end), "future_target": (future_start, future_end), "covariate": (covariate_start, covariate_end), "sample_weight": (sample_weight_start, sample_weight_end), } else: # load position of initial sample and its sub set indices end_of_output_idx_last = self._index_memory[target_idx]["end_of_output_idx"] past_start, past_end = self._index_memory[target_idx]["past_target"] future_start, future_end = self._index_memory[target_idx]["future_target"] covariate_start, covariate_end = self._index_memory[target_idx]["covariate"] sample_weight_start, sample_weight_end = self._index_memory[target_idx][ "sample_weight" ] # evaluate how much the new sample needs to be shifted, and shift all indexes idx_shift = end_of_output_idx - end_of_output_idx_last past_start += idx_shift past_end += idx_shift future_start += idx_shift future_end += idx_shift covariate_start = ( covariate_start + idx_shift if covariate_start is not None else None ) covariate_end = ( covariate_end + idx_shift if covariate_end is not None else None ) sample_weight_start = ( sample_weight_start + idx_shift if sample_weight_start is not None else None ) sample_weight_end = ( sample_weight_end + idx_shift if sample_weight_end is not None else None ) return ( past_start, past_end, future_start, future_end, covariate_start, covariate_end, sample_weight_start, sample_weight_end, )
[docs]class PastCovariatesTrainingDataset(TrainingDataset, ABC): def __init__(self): """ Abstract class for a PastCovariatesTorchModel training dataset. It contains 3-tuples of `(past_target, past_covariate, static_covariates, future_target)` `np.ndarray`. The covariates are optional and can be `None`. """ super().__init__() @abstractmethod def __getitem__( self, idx: int ) -> Tuple[ np.ndarray, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], np.ndarray, ]: pass
[docs]class FutureCovariatesTrainingDataset(TrainingDataset, ABC): def __init__(self): """ Abstract class for a FutureCovariatesTorchModel training dataset. It contains 3-tuples of `(past_target, future_covariate, static_covariates, future_target)` `np.ndarray`. The covariates are optional and can be `None`. """ super().__init__() @abstractmethod def __getitem__( self, idx: int ) -> Tuple[ np.ndarray, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], np.ndarray, ]: pass
[docs]class DualCovariatesTrainingDataset(TrainingDataset, ABC): def __init__(self): """ Abstract class for a DualCovariatesTorchModel training dataset. It contains 4-tuples of `(past_target, historic_future_covariates, future_covariates, static_covariates, future_target)` `np.ndarray`. The covariates are optional and can be `None`. """ super().__init__() @abstractmethod def __getitem__( self, idx: int ) -> Tuple[ np.ndarray, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], np.ndarray, ]: pass
[docs]class MixedCovariatesTrainingDataset(TrainingDataset, ABC): def __init__(self): """ Abstract class for a MixedCovariatesTorchModel training dataset. It contains 5-tuples of `(past_target, past_covariates, historic_future_covariates, future_covariates, static_covariates, future_target)` `np.ndarray`. The covariates are optional and can be `None`. """ super().__init__() @abstractmethod def __getitem__( self, idx: int ) -> Tuple[ np.ndarray, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], np.ndarray, ]: pass
[docs]class SplitCovariatesTrainingDataset(TrainingDataset, ABC): def __init__(self): """ Abstract class for a SplitCovariatesTorchModel training dataset. It contains 4-tuples of `(past_target, past_covariates, future_covariates, static_covariates, future_target)` `np.ndarray`. The covariates are optional and can be `None`. """ super().__init__() @abstractmethod def __getitem__( self, idx: int ) -> Tuple[ np.ndarray, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], np.ndarray, ]: pass