DeepTCN model¶
There is an example of probabilistic forecasting using TCN model that very close to DeepTCN described in https://arxiv.org/abs/1906.04397
[1]:
from utils import fix_pythonpath_if_working_locally
fix_pythonpath_if_working_locally()
%matplotlib inline
[2]:
import pandas as pd
from darts.models import TCNModel
from darts.utils.callbacks import TFMProgressBar
import darts.utils.timeseries_generation as tg
from darts.utils.likelihood_models import GaussianLikelihood, QuantileRegression
from darts.datasets import EnergyDataset
from darts.utils.missing_values import fill_missing_values
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts import concatenate
import warnings
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)
import matplotlib.pyplot as plt
def generate_torch_kwargs():
# run torch models on CPU, and disable progress bars for all model stages except training.
return {
"pl_trainer_kwargs": {
"accelerator": "cpu",
"callbacks": [TFMProgressBar(enable_train_bar_only=True)],
}
}
Variable noise series¶
[3]:
length = 400
trend = tg.linear_timeseries(length=length, end_value=4)
season1 = tg.sine_timeseries(length=length, value_frequency=0.05, value_amplitude=1.0)
noise = tg.gaussian_timeseries(length=length, std=0.6)
noise_modulator = (
tg.sine_timeseries(length=length, value_frequency=0.02)
+ tg.constant_timeseries(length=length, value=1)
) / 2
noise = noise * noise_modulator
target_series = sum([noise, season1])
covariates = noise_modulator
target_train, target_val = target_series.split_after(0.65)
plt.figure(figsize=(10, 3))
target_train.plot()
target_val.plot()
[3]:
<Axes: xlabel='time'>
[4]:
deeptcn = TCNModel(
input_chunk_length=30,
output_chunk_length=20,
kernel_size=2,
num_filters=4,
dilation_base=2,
dropout=0,
random_state=0,
likelihood=GaussianLikelihood(),
**generate_torch_kwargs(),
)
deeptcn.fit(target_train, past_covariates=covariates)
[4]:
TCNModel(kernel_size=2, num_filters=4, num_layers=None, dilation_base=2, weight_norm=False, dropout=0, input_chunk_length=30, output_chunk_length=20, random_state=0, likelihood=GaussianLikelihood(prior_mu=None, prior_sigma=None, prior_strength=1.0, beta_nll=0.0), pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x2a9ef6470>]})
[5]:
pred = deeptcn.predict(80, past_covariates=covariates, num_samples=100)
target_val.slice_intersect(pred).plot(label="target")
pred.plot(label="forecast")
[5]:
<Axes: xlabel='time'>
Daily energy production¶
[6]:
df3 = EnergyDataset().load().pd_dataframe()
df3_day_avg = (
df3.groupby(df3.index.astype(str).str.split(" ").str[0]).mean().reset_index()
)
series_en = fill_missing_values(
TimeSeries.from_dataframe(
df3_day_avg, "time", ["generation hydro run-of-river and poundage"]
),
"auto",
)
# scale
scaler_en = Scaler()
train_en, val_en = series_en.split_after(pd.Timestamp("20170901"))
train_en_transformed = scaler_en.fit_transform(train_en)
val_en_transformed = scaler_en.transform(val_en)
series_en_transformed = scaler_en.transform(series_en)
# add the day as a covariate (no scaling required, as one-hot-encoded)
day_series = datetime_attribute_timeseries(
series_en_transformed, attribute="day", one_hot=True
)
plt.figure(figsize=(10, 3))
train_en_transformed.plot(label="train")
val_en_transformed.plot(label="validation")
[6]:
<Axes: xlabel='time'>
[7]:
model_name = "TCN_test"
deeptcn = TCNModel(
dropout=0.2,
batch_size=32,
n_epochs=50,
optimizer_kwargs={"lr": 1e-3},
random_state=0,
input_chunk_length=300,
output_chunk_length=30,
kernel_size=3,
num_filters=4,
likelihood=QuantileRegression(),
model_name=model_name,
save_checkpoints=True,
force_reset=True,
**generate_torch_kwargs(),
)
deeptcn.fit(
series=train_en_transformed,
past_covariates=day_series,
val_series=val_en_transformed,
val_past_covariates=day_series,
)
[7]:
TCNModel(kernel_size=3, num_filters=4, num_layers=None, dilation_base=2, weight_norm=False, dropout=0.2, batch_size=32, n_epochs=50, optimizer_kwargs={'lr': 0.001}, random_state=0, input_chunk_length=300, output_chunk_length=30, likelihood=QuantileRegression(quantiles: Optional[List[float]] = None), model_name=TCN_test, save_checkpoints=True, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x2a9c067a0>]})
Let’s load the model at the best performing state
[8]:
deeptcn = TCNModel.load_from_checkpoint(model_name=model_name, best=True)
Now we perform historical forecasts: - we start at the predictions at the beginning of the validation set (start=val_en_transformed.start_time()
) - each prediction will have length forecast_horizon=30
. - the next prediction will start stride=30
points ahead - we keep all predicted values from each forecast (last_points_only=False
) - continue until we run out of input data
In the end we concatenate the historical forecasts to get a single continuous (on time axis) time series
[9]:
backtest_en = deeptcn.historical_forecasts(
series=series_en_transformed,
start=val_en_transformed.start_time(),
past_covariates=day_series,
num_samples=500,
forecast_horizon=30,
stride=30,
last_points_only=False,
retrain=False,
verbose=True,
)
backtest_en = concatenate(backtest_en)
[10]:
plt.figure(figsize=(10, 6))
val_en_transformed.plot(label="actual")
backtest_en.plot(label="backtest q0.05 - q0.95", low_quantile=0.05, high_quantile=0.95)
backtest_en.plot(label="backtest q0.25 - q0.75", low_quantile=0.25, high_quantile=0.75)
plt.legend()
[10]:
<matplotlib.legend.Legend at 0x2acf4d4e0>
[ ]: