Source code for darts.explainability.utils

from typing import List, Optional, Sequence, Tuple, Union

from darts import TimeSeries
from darts.logging import get_logger, raise_if, raise_if_not, raise_log
from darts.models.forecasting.forecasting_model import ForecastingModel
from darts.utils.statistics import stationarity_tests
from darts.utils.ts_utils import series2seq

logger = get_logger(__name__)


[docs]def process_input( model: ForecastingModel, input_type: str, series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, fallback_series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, fallback_past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, fallback_future_covariates: Optional[ Union[TimeSeries, Sequence[TimeSeries]] ] = None, check_component_names: bool = False, requires_input: bool = True, requires_covariates_encoding: bool = False, test_stationarity: bool = False, ): """Helper function to process and check either of the background or foreground series input to `_ForecastingModelExplainer`. If no input was provided (`series`, `past/future_covariates`), the fallback will be used for downstream tasks. Will raise an error if both input and fallback are not available. The fallback is dependent on the input type ("background" or "foreground"): - for background `input_type`: fallback are the series saved in fitted forecasting model - for foreground `input_type`: fallback are the background series from `_ForecastingModelExplainer` Parameters ---------- model any `ForecastingModel`. input_type the type of input series. Either "background" or "foreground" series Optionally, one or a sequence of target `TimeSeries`. past_covariates Optionally, one or a sequence of past covariates `TimeSeries`. future_covariates Optionally, one or a sequence of future covariates `TimeSeries`. fallback_series Optionally, one or a sequence of target `TimeSeries` to fall back to in case `series` was not provided. fallback_past_covariates Optionally, one or a sequence of past covariates `TimeSeries` to fall back to in case `past_covariates` was not provided. fallback_future_covariates Optionally, one or a sequence of future covariates `TimeSeries` to fall back to in case `future_covariates` was not provided. check_component_names Whether to enforce that, in the case of multiple time series, all series of the same type (target or *_covariates) must have the same component names. requires_input Whether the input is required. If `True`, raises an error if no input was provided. requires_covariates_encoding Whether to apply the model's encoders to the input covariates. This should only be `True` if the Explainer will not call model methods `fit()` or `predict()` directly. test_stationarity Whether to raise a warning if not all components from the target `series` are stationary. """ if input_type not in ["background", "foreground"]: raise_log( ValueError( f"Unknown `input_type='{input_type}'`. Must be one of ['background', 'foreground']." ), logger, ) # if any input is given, treat it as if the input was required if ( series is not None or past_covariates is not None or future_covariates is not None ): requires_input = True # if `series` was not passed, use the fallback input # - for background input type: fallback are the series saved in fitted forecasting model # - for foreground input type: fallback are the background series from `_ForecastingModelExplainer` if series is None: raise_if( (past_covariates is not None) or (future_covariates is not None), f"Supplied {input_type} past or future covariates but no {input_type} series. Please also provide " f"`{input_type}_series`.", logger, ) if requires_input and fallback_series is None: error_msg = ( "`model` was fit on multiple time series." if input_type == "background" else "no `background_series` was provided at `Explainer` creation" ) raise_log( ValueError(f"`{input_type}_series` must be provided {error_msg}"), logger, ) series = fallback_series past_covariates = fallback_past_covariates future_covariates = fallback_future_covariates # otherwise use the passed input, and generate the covariate encodings (they will be removed again later on # if `requires_covariates_encoding=False`) else: if model.encoders.encoding_available: past_covariates, future_covariates = model.generate_fit_encodings( series=series, past_covariates=past_covariates, future_covariates=future_covariates, ) series = series2seq(series) past_covariates = series2seq(past_covariates) future_covariates = series2seq(future_covariates) ( target_components, static_covariates_components, past_covariates_components, future_covariates_components, ) = get_component_names( series=series, past_covariates=past_covariates, future_covariates=future_covariates, ) _check_valid_input( model, input_type, series, past_covariates, future_covariates, target_components, past_covariates_components, future_covariates_components, check_component_names=check_component_names, requires_input=requires_input, test_stationarity=test_stationarity, ) # make sure to remove any encodings from covariates if downstream tasks require covariates without encodings if not requires_covariates_encoding and model.encoders.encoding_available: if past_covariates is not None and model.encoders.past_encoders: cov = past_covariates[0] encoded = model.encoders.past_components drop_cols = cov.components[cov.components.isin(encoded)] if not drop_cols.empty and len(drop_cols) == cov.n_components: past_covariates = None elif not drop_cols.empty: past_covariates = [ cov[cov.components.drop(drop_cols).tolist()] for cov in past_covariates ] if future_covariates is not None and model.encoders.future_encoders: cov = future_covariates[0] encoded = model.encoders.future_components drop_cols = cov.components[cov.components.isin(encoded)] if not drop_cols.empty and len(drop_cols) == cov.n_components: future_covariates = None elif not drop_cols.empty: future_covariates = [ cov[cov.components.drop(drop_cols).tolist()] for cov in future_covariates ] return ( series, past_covariates, future_covariates, target_components, static_covariates_components, past_covariates_components, future_covariates_components, )
[docs]def process_horizons_and_targets( horizons: Optional[Union[int, Sequence[int]]] = None, fallback_horizon: Optional[int] = None, target_components: Optional[Union[str, Sequence[str]]] = None, fallback_target_components: Optional[Sequence[str]] = None, check_component_names: bool = False, ) -> Tuple[Sequence[int], Sequence[str]]: """Processes the input horizons and target component names. horizons Optionally, an integer or sequence of integers representing the future time steps to be explained. `1` corresponds to the first timestamp being forecasted. All values must be `<=output_chunk_length` of the explained forecasting model. fallback_horizon Optionally, a horizon to fall back to in case `horizons` was not provided. target_components Optionally, a string or sequence of strings with the target components to explain. fallback_target_components Optionally, a sequence of strings to fall back to in case `target_components` was not provided. check_component_names Whether to enforce that the target components are in `fallback_target_component`. """ if target_components is not None: if isinstance(target_components, str): target_components = [target_components] if check_component_names and fallback_target_components is not None: invalid_components = [ target_name for target_name in target_components if target_name not in fallback_target_components ] raise_if( len(invalid_components) > 0, "Invalid `target_components`. The following components are not in the components of the " f"`background_series`: {invalid_components}. Provide some valid components from: " f"{fallback_target_components}.", logger, ) else: target_components = fallback_target_components if horizons is not None: if isinstance(horizons, int): horizons = [horizons] if fallback_horizon is not None: raise_if( max(horizons) > fallback_horizon, "At least one of the `horizons` is larger than `output_chunk_length`.", ) raise_if(min(horizons) < 1, "All `horizons` must be `>=1`.") else: horizons = range(1, fallback_horizon + 1) return horizons, target_components
[docs]def get_component_names( series: Sequence[TimeSeries], past_covariates: Optional[Sequence[TimeSeries]] = None, future_covariates: Optional[Sequence[TimeSeries]] = None, idx: int = 0, ) -> Tuple[List[str], Optional[List[str]], Optional[List[str]], Optional[List[str]]]: """Extract and return the components of target series, static covariate, past and future covariates series. Parameters ---------- model any `ForecastingModel`. series A sequence of target `TimeSeries`. past_covariates Optionally, a sequence of past covariates `TimeSeries`. future_covariates Optionally, a sequence of future covariates `TimeSeries`. idx the index of the input sequences to extract the components from. """ target_components = series[idx].components.tolist() # covariates static_covariates = series[idx].static_covariates sc_components = ( static_covariates.columns.tolist() if static_covariates is not None else [] ) pc_components = ( past_covariates[idx].components.tolist() if past_covariates is not None else [] ) fc_components = ( future_covariates[idx].components.tolist() if future_covariates is not None else [] ) # set to None if not available sc_components = sc_components if sc_components else None pc_components = pc_components if pc_components else None fc_components = fc_components if fc_components else None return target_components, sc_components, pc_components, fc_components
def _check_valid_input( model, input_type: str, series: Sequence[TimeSeries], past_covariates: Optional[Sequence[TimeSeries]], future_covariates: Optional[Sequence[TimeSeries]], target_components: Optional[List[str]], past_covariates_components: Optional[List[str]], future_covariates_components: Optional[List[str]], check_component_names: bool = False, requires_input: bool = False, test_stationarity: bool = False, ): """Checks that the input is valid""" if test_stationarity and series is not None: if not _test_stationarity(series): logger.warning( "At least one component of the target series is not stationary. " "Beware of wrong interpretation of the chosen explainability." ) if input_type not in ["background", "foreground"]: raise_log( ValueError( f"Unknown `input_type='{input_type}'`. Must be one of ['background', 'foreground']." ), logger, ) if past_covariates is not None: raise_if_not( len(series) == len(past_covariates), f"The number of {input_type} series and past covariates must be the same.", logger, ) if future_covariates is not None: raise_if_not( len(series) == len(future_covariates), f"The number of {input_type} series and future covariates must be the same.", logger, ) if requires_input: raise_if( model.uses_past_covariates and past_covariates is None, f"A {input_type} past covariates is not provided, but the model requires past covariates.", logger, ) raise_if( model.uses_future_covariates and future_covariates is None, f"A {input_type} future covariates is not provided, but the model requires future covariates.", logger, ) if not check_component_names: return # ensure we have the same names between TimeSeries (if list of). Important to ensure homogeneity # for explained features. for idx in range(len(series)): raise_if_not( all( [ series[idx].columns.to_list() == target_components, ( past_covariates[idx].columns.to_list() == past_covariates_components if past_covariates is not None else True ), ( future_covariates[idx].columns.to_list() == future_covariates_components if future_covariates is not None else True ), ] ), "Columns names must be identical between TimeSeries list components (multi-TimeSeries).", ) def _test_stationarity(series: Union[TimeSeries, Sequence[TimeSeries]]): return all([(stationarity_tests(bs[c]) for c in bs.components) for bs in series])