Source code for darts.utils.timeseries_generation

"""
Utils for time series generation
--------------------------------
"""

import math
from typing import List, Optional, Sequence, Tuple, Union

import holidays
import numpy as np
import pandas as pd

from darts import TimeSeries
from darts.logging import get_logger, raise_if, raise_if_not, raise_log
from darts.utils.utils import generate_index

logger = get_logger(__name__)

ONE_INDEXED_FREQS = {
    "day",
    "month",
    "quarter",
    "dayofyear",
    "day_of_year",
    "week",
    "weekofyear",
    "week_of_year",
}


[docs]def constant_timeseries( value: float = 1, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: Union[str, int] = None, column_name: Optional[str] = "constant", dtype: np.dtype = np.float64, ) -> TimeSeries: """ Creates a constant univariate TimeSeries with the given value, length (or end date), start date and frequency. Parameters ---------- value The constant value that the TimeSeries object will assume at every index. start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned index. In case `start` is a timestamp, a DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. By default, "D" (daily) is used. If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex. The freq is optional for generating an integer index (if not specified, 1 is used). column_name Optionally, the name of the value column for the returned TimeSeries dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series Returns ------- TimeSeries A constant TimeSeries with value 'value'. """ index = generate_index(start=start, end=end, freq=freq, length=length) values = np.full(len(index), value, dtype=dtype) return TimeSeries.from_times_and_values( index, values, freq=freq, columns=pd.Index([column_name]) )
[docs]def linear_timeseries( start_value: float = 0, end_value: float = 1, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: Union[str, int] = None, column_name: Optional[str] = "linear", dtype: np.dtype = np.float64, ) -> TimeSeries: """ Creates a univariate TimeSeries with a starting value of `start_value` that increases linearly such that it takes on the value `end_value` at the last entry of the TimeSeries. This means that the difference between two adjacent entries will be equal to (`end_value` - `start_value`) / (`length` - 1). Parameters ---------- start_value The value of the first entry in the TimeSeries. end_value The value of the last entry in the TimeSeries. start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned index. In case `start` is a timestamp, a DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. By default, "D" (daily) is used. If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex. The freq is optional for generating an integer index (if not specified, 1 is used). column_name Optionally, the name of the value column for the returned TimeSeries dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series Returns ------- TimeSeries A linear TimeSeries created as indicated above. """ index = generate_index(start=start, end=end, freq=freq, length=length) values = np.linspace(start_value, end_value, len(index), dtype=dtype) return TimeSeries.from_times_and_values( index, values, freq=freq, columns=pd.Index([column_name]) )
[docs]def sine_timeseries( value_frequency: float = 0.1, value_amplitude: float = 1.0, value_phase: float = 0.0, value_y_offset: float = 0.0, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: Union[str, int] = None, column_name: Optional[str] = "sine", dtype: np.dtype = np.float64, ) -> TimeSeries: """ Creates a univariate TimeSeries with a sinusoidal value progression with a given frequency, amplitude, phase and y offset. Parameters ---------- value_frequency The number of periods that take place within one time unit given in `freq`. value_amplitude The maximum difference between any value of the returned TimeSeries and `y_offset`. value_phase The relative position within one period of the first value of the returned TimeSeries (in radians). value_y_offset The shift of the sine function along the y axis. start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned index. In case `start` is a timestamp, a DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. By default, "D" (daily) is used. If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex. The freq is optional for generating an integer index (if not specified, 1 is used). column_name Optionally, the name of the value column for the returned TimeSeries dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series Returns ------- TimeSeries A sinusoidal TimeSeries parametrized as indicated above. """ index = generate_index(start=start, end=end, freq=freq, length=length) values = np.array(range(len(index)), dtype=dtype) f = np.vectorize( lambda x: value_amplitude * math.sin(2 * math.pi * value_frequency * x + value_phase) + value_y_offset ) values = f(values) return TimeSeries.from_times_and_values( index, values, freq=freq, columns=pd.Index([column_name]) )
[docs]def gaussian_timeseries( mean: Union[float, np.ndarray] = 0.0, std: Union[float, np.ndarray] = 1.0, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: Union[str, int] = None, column_name: Optional[str] = "gaussian", dtype: np.dtype = np.float64, ) -> TimeSeries: """ Creates a gaussian univariate TimeSeries by sampling all the series values independently, from a gaussian distribution with mean `mean` and standard deviation `std`. Parameters ---------- mean The mean of the gaussian distribution that is sampled at each step. If a float value is given, the same mean is used at every step. If a numpy.ndarray of floats with the same length as `length` is given, a different mean is used at each time step. std The standard deviation of the gaussian distribution that is sampled at each step. If a float value is given, the same standard deviation is used at every step. If an array of dimension `(length, length)` is given, it will be used as covariance matrix for a multivariate gaussian distribution. start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned index. In case `start` is a timestamp, a DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. By default, "D" (daily) is used. If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex. The freq is optional for generating an integer index (if not specified, 1 is used). column_name Optionally, the name of the value column for the returned TimeSeries dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series Returns ------- TimeSeries A white noise TimeSeries created as indicated above. """ if isinstance(mean, np.ndarray): raise_if_not( mean.shape == (length,), "If a vector of means is provided, " "it requires the same length as the TimeSeries.", logger, ) if isinstance(std, np.ndarray): raise_if_not( std.shape == (length, length), "If a matrix of standard deviations is provided, " "its shape has to match the length of the TimeSeries.", logger, ) index = generate_index(start=start, end=end, freq=freq, length=length) values = np.random.normal(mean, std, size=len(index)).astype(dtype) return TimeSeries.from_times_and_values( index, values, freq=freq, columns=pd.Index([column_name]) )
[docs]def random_walk_timeseries( mean: float = 0.0, std: float = 1.0, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: Union[str, int] = None, column_name: Optional[str] = "random_walk", dtype: np.dtype = np.float64, ) -> TimeSeries: """ Creates a random walk univariate TimeSeries, where each step is obtained by sampling a gaussian distribution with mean `mean` and standard deviation `std`. Parameters ---------- mean The mean of the gaussian distribution that is sampled at each step. std The standard deviation of the gaussian distribution that is sampled at each step. start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned index. In case `start` is a timestamp, a DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. By default, "D" (daily) is used. If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex. The freq is optional for generating an integer index (if not specified, 1 is used). column_name Optionally, the name of the value column for the returned TimeSeries dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series Returns ------- TimeSeries A random walk TimeSeries created as indicated above. """ index = generate_index(start=start, end=end, freq=freq, length=length) values = np.cumsum(np.random.normal(mean, std, size=len(index)), dtype=dtype) return TimeSeries.from_times_and_values( index, values, freq=freq, columns=pd.Index([column_name]) )
[docs]def autoregressive_timeseries( coef: Sequence[float], start_values: Optional[Sequence[float]] = None, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: Union[str, int] = None, column_name: Optional[str] = "autoregressive", ) -> TimeSeries: """ Creates a univariate, autoregressive TimeSeries whose values are calculated using specified coefficients `coef` and starting values `start_values`. Parameters ---------- coef The autoregressive coefficients used for calculating the next time step. series[t] = coef[-1] * series[t-1] + coef[-2] * series[t-2] + ... + coef[0] * series[t-len(coef)] start_values The starting values used for calculating the first few values for which no lags exist yet. series[0] = coef[-1] * starting_values[-1] + coef[-2] * starting_values[-2] + ... + coef[0] * starting_values[0] start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned index. In case `start` is a timestamp, a DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. By default, "D" (daily) is used. If `start` is an integer, `freq` will be interpreted as the step size in the underlying RangeIndex. The freq is optional for generating an integer index (if not specified, 1 is used). column_name Optionally, the name of the value column for the returned TimeSeries Returns ------- TimeSeries An autoregressive TimeSeries created as indicated above. """ # if no start values specified default to a list of 1s if start_values is None: start_values = np.ones(len(coef)) else: raise_if_not( len(start_values) == len(coef), "start_values must have same length as coef.", ) index = generate_index(start=start, end=end, freq=freq, length=length) values = np.empty(len(coef) + len(index)) values[: len(coef)] = start_values for i in range(len(coef), len(coef) + len(index)): # calculate next time step as dot product of coefs with previous len(coef) time steps values[i] = np.dot(values[i - len(coef) : i], coef) return TimeSeries.from_times_and_values( index, values[len(coef) :], freq=freq, columns=pd.Index([column_name]) )
def _extend_time_index_until( time_index: Union[pd.DatetimeIndex, pd.RangeIndex], until: Optional[Union[int, str, pd.Timestamp]], add_length: int, ) -> pd.DatetimeIndex: if not add_length and not until: return time_index raise_if(bool(add_length) and bool(until), "set only one of add_length and until") end = time_index[-1] freq = time_index.freq if add_length: raise_if_not( add_length >= 0, f"Expected add_length, by which to extend the time series by, " f"to be positive, got {add_length}", ) try: end += add_length * freq except pd.errors.OutOfBoundsDatetime: raise_log( ValueError( f"the add operation between {end} and {add_length * freq} will overflow" ), logger, ) else: datetime_index = isinstance(time_index, pd.DatetimeIndex) if datetime_index: raise_if_not( isinstance(until, (str, pd.Timestamp)), "Expected valid timestamp for TimeSeries, " "indexed by DatetimeIndex, " f"for parameter until, got {type(end)}", logger, ) else: raise_if_not( isinstance(until, int), "Expected integer for TimeSeries, indexed by RangeIndex, " f"for parameter until, got {type(end)}", logger, ) timestamp = pd.Timestamp(until) if datetime_index else until raise_if_not( timestamp > end, f"Expected until, {timestamp} to lie past end of time index {end}", ) ahead = timestamp - end raise_if_not( (ahead % freq) == pd.Timedelta(0), f"End date must correspond with frequency {freq} of the time axis", logger, ) end = timestamp new_time_index = pd.date_range(start=time_index[0], end=end, freq=freq) return new_time_index
[docs]def holidays_timeseries( time_index: Union[TimeSeries, pd.DatetimeIndex], country_code: str, prov: str = None, state: str = None, column_name: Optional[str] = "holidays", until: Optional[Union[int, str, pd.Timestamp]] = None, add_length: int = 0, dtype: np.dtype = np.float64, tz: Optional[str] = None, ) -> TimeSeries: """ Creates a binary univariate TimeSeries with index `time_index` that equals 1 at every index that lies within (or equals) a selected country's holiday, and 0 otherwise. Available countries can be found `here <https://github.com/dr-prodigy/python-holidays#available-countries>`_. Parameters ---------- time_index Either a `pd.DatetimeIndex` or a `TimeSeries` for which to generate the holidays. country_code The country ISO code. prov The province. state The state. until Extend the time_index up until timestamp for datetime indexed series and int for range indexed series, should match or exceed forecasting window. add_length Extend the time_index by add_length, should match or exceed forecasting window. Set only one of until and add_length. column_name Optionally, the name of the value column for the returned TimeSeries. dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series. tz Optionally, a time zone to convert the time index to before generating the holidays. Returns ------- TimeSeries A new binary holiday TimeSeries instance. """ time_index_ts, time_index = _process_time_index( time_index=time_index, tz=tz, until=until, add_length=add_length, ) scope = range(time_index[0].year, (time_index[-1] + pd.Timedelta(days=1)).year) country_holidays = holidays.country_holidays( country_code, prov=prov, state=state, years=scope ) index_series = pd.Series(time_index, index=time_index) values = index_series.apply(lambda x: x in country_holidays).astype(dtype) return TimeSeries.from_times_and_values( time_index_ts, values, columns=pd.Index([column_name]) )
[docs]def datetime_attribute_timeseries( time_index: Union[pd.DatetimeIndex, TimeSeries], attribute: str, one_hot: bool = False, cyclic: bool = False, until: Optional[Union[int, str, pd.Timestamp]] = None, add_length: int = 0, dtype=np.float64, with_columns: Optional[Union[List[str], str]] = None, tz: Optional[str] = None, ) -> TimeSeries: """ Returns a new TimeSeries with index `time_index` and one or more dimensions containing (optionally one-hot encoded or cyclic encoded) pd.DatatimeIndex attribute information derived from the index. 1-indexed attributes are shifted to enforce 0-indexing across all the encodings. Parameters ---------- time_index Either a `pd.DatetimeIndex` attribute which will serve as the basis of the new column(s), or a `TimeSeries` whose time axis will serve this purpose. attribute An attribute of `pd.DatetimeIndex`, or `week` / `weekofyear` / `week_of_year` - e.g. "month", "weekday", "day", "hour", "minute", "second". See all available attributes in https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DatetimeIndex.html#pandas.DatetimeIndex. one_hot Boolean value indicating whether to add the specified attribute as a one hot encoding (results in more columns). cyclic Boolean value indicating whether to add the specified attribute as a cyclic encoding. Alternative to one_hot encoding, enable only one of the two. (adds 2 columns, corresponding to sin and cos transformation) until Extend the time_index up until timestamp for datetime indexed series and int for range indexed series, should match or exceed forecasting window. add_length Extend the time_index by add_length, should match or exceed forecasting window. Set only one of until and add_length. dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series with_columns Optionally, specify the output component names. * If `one_hot` and `cyclic` are ``False``, must be a string * If `cyclic` is ``True``, must be a list of two strings. The first string for the sine, the second for the cosine component name. * If `one_hot` is ``True``, must be a list of strings of the same length as the generated one hot encoded features. tz Optionally, a time zone to convert the time index to before computing the attributes. Returns ------- TimeSeries New datetime attribute TimeSeries instance. """ time_index_ts, time_index = _process_time_index( time_index=time_index, tz=tz, until=until, add_length=add_length, ) raise_if_not( hasattr(pd.DatetimeIndex, attribute) or (attribute in ["week", "weekofyear", "week_of_year"]), f"attribute `{attribute}` needs to be an attribute of pd.DatetimeIndex. " "See all available attributes in " "https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DatetimeIndex.html#pandas.DatetimeIndex", logger, ) raise_if(one_hot and cyclic, "set only one of one_hot or cyclic to true", logger) num_values_dict = { "month": 12, "day": 31, "weekday": 7, "dayofweek": 7, "day_of_week": 7, "hour": 24, "minute": 60, "second": 60, "microsecond": 1000000, "nanosecond": 1000, "quarter": 4, "dayofyear": 365, "day_of_year": 365, "week": 52, "weekofyear": 52, "week_of_year": 52, } if attribute not in ["week", "weekofyear", "week_of_year"]: values = getattr(time_index, attribute) else: values = ( time_index.isocalendar() .set_index("week") .index.astype("int64") .rename("time") ) # shift 1-indexed datetime attributes if attribute in ONE_INDEXED_FREQS: values -= 1 # leap years insert an additional day on the 29th of Feburary if attribute in {"dayofyear", "day_of_year"} and any(time_index.is_leap_year): num_values_dict[attribute] += 1 # years contain an additional week if they are : # - a regular year starting on a thursday # - a leap year starting on a wednesday if attribute in {"week", "weekofyear", "week_of_year"}: years = time_index.year.unique() # check if year respect properties additional_week_year = any( ((not first_day.is_leap_year) and first_day.day_name() == "Thursday") or (first_day.is_leap_year and first_day.day_name() == "Wednesday") for first_day in [pd.Timestamp(f"{year}-01-01") for year in years] ) # check if time index actually include the additional week additional_week_in_index = time_index[-1] - time_index[0] + pd.Timedelta( days=1 ) >= pd.Timedelta(days=365) if additional_week_year and additional_week_in_index: num_values_dict[attribute] += 1 if one_hot or cyclic: raise_if_not( attribute in num_values_dict, f"Given datetime attribute `{attribute}` not supported with one-hot or cyclical encoding. " f"Supported datetime attribute: {list(num_values_dict.keys())}", logger, ) if one_hot: values_df = pd.get_dummies(values) # fill missing columns (in case not all values appear in time_index) attribute_range = range(num_values_dict[attribute]) for i in attribute_range: if i not in values_df.columns: values_df[i] = 0 values_df = values_df[attribute_range] if with_columns is None: with_columns = [ attribute + "_" + str(column_name) for column_name in values_df.columns ] raise_if_not( len(with_columns) == len(values_df.columns), "For the given case with `one_hot=True`,`with_columns` must be a list of strings of length " f"{values_df.columns}.", logger=logger, ) values_df.columns = with_columns else: if cyclic: if attribute == "day": periods = time_index.days_in_month.values freq = 2 * np.pi * np.reciprocal(periods.astype(dtype)) else: period = num_values_dict[attribute] freq = 2 * np.pi / period if with_columns is None: with_columns = [attribute + "_sin", attribute + "_cos"] raise_if( len(with_columns) != 2, "`with_columns` must be a list of two strings when `cyclic=True`. " "The first string for the sine component name, the second for the cosine component name.", logger=logger, ) values_df = pd.DataFrame( { with_columns[0]: np.sin(freq * values), with_columns[1]: np.cos(freq * values), } ) else: if with_columns is None: with_columns = attribute raise_if_not( isinstance(with_columns, str), "`with_columns` must be a string specifying the output component name.", logger=logger, ) values_df = pd.DataFrame({with_columns: values}) values_df.index = time_index_ts return TimeSeries.from_dataframe(values_df).astype(dtype)
def _build_forecast_series( points_preds: Union[np.ndarray, Sequence[np.ndarray]], input_series: TimeSeries, custom_columns: List[str] = None, with_static_covs: bool = True, with_hierarchy: bool = True, pred_start: Optional[Union[pd.Timestamp, int]] = None, ) -> TimeSeries: """ Builds a forecast time series starting after the end of an input time series, with the correct time index (or after the end of the input series, if specified). Parameters ---------- points_preds Forecasted values, can be either the target(s) or parameters of the likelihood model input_series TimeSeries used as input for the prediction custom_columns New names for the forecast TimeSeries, used when the number of components changes with_static_covs If set to False, do not copy the input_series `static_covariates` attribute with_hierarchy If set to False, do not copy the input_series `hierarchy` attribute pred_start Optionally, give a custom prediction start point. Returns ------- TimeSeries New TimeSeries instance starting after the input series """ time_index_length = ( len(points_preds) if isinstance(points_preds, np.ndarray) else len(points_preds[0]) ) time_index = _generate_new_dates( time_index_length, input_series=input_series, start=pred_start, ) values = ( points_preds if isinstance(points_preds, np.ndarray) else np.stack(points_preds, axis=2) ) return TimeSeries.from_times_and_values( time_index, values, freq=input_series.freq_str, columns=input_series.columns if custom_columns is None else custom_columns, static_covariates=input_series.static_covariates if with_static_covs else None, hierarchy=input_series.hierarchy if with_hierarchy else None, ) def _generate_new_dates( n: int, input_series: TimeSeries, start: Optional[Union[pd.Timestamp, int]] = None ) -> Union[pd.DatetimeIndex, pd.RangeIndex]: """ Generates `n` new dates after the end of the specified series """ if start is None: last = input_series.end_time() start = last + input_series.freq return generate_index( start=start, freq=input_series.freq, length=n, name=input_series.time_dim ) def _process_time_index( time_index: Union[TimeSeries, pd.DatetimeIndex], tz: Optional[str] = None, until: Optional[Union[int, str, pd.Timestamp]] = None, add_length: int = 0, ) -> Tuple[pd.DatetimeIndex, pd.DatetimeIndex]: """ Extracts the time index, and optionally adds some time steps after the end of the index, and/or converts the index to another time zone. Returns a tuple of pd.DatetimeIndex with the first being the naive time index for generating a new TimeSeries, and the second being the one used for generating datetime attributes and holidays in a potentially different time zone. """ if isinstance(time_index, TimeSeries): time_index = time_index.time_index if not isinstance(time_index, pd.DatetimeIndex): raise_log( ValueError( "`time_index` must be a pandas `DatetimeIndex` or a `TimeSeries` indexed with a `DatetimeIndex`." ), logger=logger, ) if time_index.tz is not None: raise_log( ValueError("`time_index` must be time zone naive."), logger=logger, ) time_index = _extend_time_index_until(time_index, until, add_length) # convert to another time zone if tz is not None: time_index_ = time_index.tz_localize("UTC").tz_convert(tz) else: time_index_ = time_index return time_index, time_index_