"""
Likelihood Models
-----------------
The likelihood models contain all the logic needed to train and use Darts' neural network models in
a probabilistic way. This essentially means computing an appropriate training loss and sample from the
distribution, given the parameters of the distribution.
By default, all versions will be trained using their negative log likelihood as a loss function
(hence performing maximum likelihood estimation when training the model).
However, most likelihoods also optionally support specifying time-independent "prior"
beliefs about the distribution parameters.
In such cases, the a KL-divergence term is added to the loss in order to regularise it in the
direction of the specified prior distribution. (Note that this is technically not purely
a Bayesian approach as the priors are actual parameters values, and not distributions).
The parameter `prior_strength` controls the strength of the "prior" regularisation on the loss.
Some distributions (such as ``GaussianLikelihood``, and ``PoissonLikelihood``) are univariate,
in which case they are applied to model each component of multivariate series independently.
Some other distributions (such as ``DirichletLikelihood``) are multivariate,
in which case they will model all components of multivariate time series jointly.
Univariate likelihoods accept either scalar or array-like values for the optional prior parameters.
If a scalar is provided, it is used as a prior for all components of the series. If an array-like is provided,
the i-th value will be used as a prior for the i-th component of the series. Multivariate likelihoods
require array-like objects when specifying priors.
The target series used for training must always lie within the distribution's support, otherwise
errors will be raised during training. You can refer to the individual likelihoods' documentation
to see what is the support. Similarly, the prior parameters also have to lie in some pre-defined domains.
"""
import collections.abc
import inspect
from abc import ABC, abstractmethod
from typing import Optional, Union
import numpy as np
import torch
import torch.nn as nn
from torch.distributions import Bernoulli as _Bernoulli
from torch.distributions import Beta as _Beta
from torch.distributions import Cauchy as _Cauchy
from torch.distributions import ContinuousBernoulli as _ContinuousBernoulli
from torch.distributions import Dirichlet as _Dirichlet
from torch.distributions import Exponential as _Exponential
from torch.distributions import Gamma as _Gamma
from torch.distributions import Geometric as _Geometric
from torch.distributions import Gumbel as _Gumbel
from torch.distributions import HalfNormal as _HalfNormal
from torch.distributions import Laplace as _Laplace
from torch.distributions import LogNormal as _LogNormal
from torch.distributions import NegativeBinomial as _NegativeBinomial
from torch.distributions import Normal as _Normal
from torch.distributions import Poisson as _Poisson
from torch.distributions import Weibull as _Weibull
from torch.distributions.kl import kl_divergence
from darts import TimeSeries
from darts.logging import raise_if_not
# TODO: Table on README listing distribution, possible priors and wiki article
from darts.utils.utils import (
_check_quantiles,
likelihood_component_names,
quantile_names,
)
MIN_CAUCHY_GAMMA_SAMPLING = 1e-100
# Some utils for checking parameters' domains
def _check(param, predicate, param_name, condition_str):
if param is None:
return
if isinstance(param, (collections.abc.Sequence, np.ndarray)):
raise_if_not(
all(predicate(p) for p in param),
f"All provided parameters {param_name} must be {condition_str}.",
)
else:
raise_if_not(
predicate(param),
f"The parameter {param_name} must be {condition_str}.",
)
def _check_strict_positive(param, param_name=""):
_check(param, lambda p: p > 0, param_name, "strictly positive")
def _check_in_open_0_1_intvl(param, param_name=""):
_check(param, lambda p: 0 < p < 1, param_name, "in the open interval (0, 1)")
[docs]class Likelihood(ABC):
def __init__(self, prior_strength=1.0):
"""
Abstract class for a likelihood model.
"""
self.prior_strength = prior_strength
# used for equality operator between likelihood objects
self.ignore_attrs_equality = []
[docs] def compute_loss(
self,
model_output: torch.Tensor,
target: torch.Tensor,
sample_weight: torch.Tensor,
):
"""
Computes a loss from a `model_output`, which represents the parameters of a given probability
distribution for every ground truth value in `target`, and the `target` itself.
"""
params_out = self._params_from_output(model_output)
loss = self._nllloss(params_out, target, sample_weight)
prior_params = self._prior_params
use_prior = prior_params is not None and any(
p is not None for p in prior_params
)
if use_prior:
out_distr = self._distr_from_params(params_out)
device = params_out[0].device
prior_params = tuple(
# use model output as "prior" for parameters not specified as prior
(
torch.tensor(prior_params[i]).to(device)
if prior_params[i] is not None
else params_out[i]
)
for i in range(len(prior_params))
)
prior_distr = self._distr_from_params(prior_params)
# Loss regularization using the prior distribution
loss += self.prior_strength * torch.mean(
kl_divergence(prior_distr, out_distr)
)
return loss
def _nllloss(self, params_out, target, sample_weight):
"""
This is the basic way to compute the NLL loss. It can be overwritten by likelihoods for which
PyTorch proposes a numerically better NLL loss.
"""
out_distr = self._distr_from_params(params_out)
loss = -out_distr.log_prob(target)
if sample_weight is not None:
loss = loss * sample_weight
return loss.mean()
@property
def _prior_params(self):
"""
Has to be overwritten by the Likelihood objects supporting specifying a prior distribution on the
outputs. If it returns None, no prior will be used and the model will be trained with plain maximum likelihood.
"""
return None
@abstractmethod
def _distr_from_params(self, params: tuple) -> torch.distributions.Distribution:
"""
Returns a torch distribution built with the specified params
"""
pass
@abstractmethod
def _params_from_output(
self, model_output: torch.Tensor
) -> Union[tuple[torch.Tensor, ...], torch.Tensor]:
"""
Returns the distribution parameters, obtained from the raw model outputs
(e.g. applies softplus or sigmoids to get parameters in the expected domains).
"""
pass
[docs] @abstractmethod
def sample(self, model_output: torch.Tensor) -> torch.Tensor:
"""
Samples a prediction from the probability distributions defined by the specific likelihood model
and the parameters given in `model_output`.
"""
pass
[docs] def predict_likelihood_parameters(self, model_output: torch.Tensor) -> torch.Tensor:
"""
Returns the distribution parameters as a single Tensor, extracted from the raw model outputs.
"""
params = self._params_from_output(model_output)
if isinstance(params, torch.Tensor):
return params
else:
# interleave the predicted parameters to group them by input series component
num_samples, n_times, n_components, n_params = model_output.shape
return torch.stack(params, dim=3).reshape((
num_samples,
n_times,
n_components * n_params,
))
[docs] @abstractmethod
def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
"""
Generates names for the parameters of the Likelihood.
"""
pass
def _likelihood_generate_components_names(
self, input_series: TimeSeries, parameter_names: list[str]
) -> list[str]:
return [
f"{tgt_name}_{param_n}"
for tgt_name in input_series.components
for param_n in parameter_names
]
@property
@abstractmethod
def num_parameters(self) -> int:
"""
Returns the number of parameters that define the probability distribution for one single
target value.
"""
pass
[docs] @abstractmethod
def simplified_name(self) -> str:
"""Returns a simplified name, used to compare Likelihood and LikelihoodMixin_ instances"""
pass
def __eq__(self, other) -> bool:
"""
Defines (in)equality between two likelihood objects, ignore the attributes listed in
self.ignore_attrs_equality or inheriting from torch.nn.Module.
"""
if type(other) is type(self):
other_state = {
k: v
for k, v in other.__dict__.items()
if k not in self.ignore_attrs_equality and not isinstance(v, nn.Module)
}
self_state = {
k: v
for k, v in self.__dict__.items()
if k not in self.ignore_attrs_equality and not isinstance(v, nn.Module)
}
return other_state == self_state
else:
return False
def __repr__(self) -> str:
"""Return the class and parameters of the instance in a nice format"""
cls_name = self.__class__.__name__
# only display the constructor parameters as user cannot change the other attributes
init_signature = inspect.signature(self.__class__.__init__)
params_string = ", ".join([
f"{str(v)}"
for _, v in init_signature.parameters.items()
if str(v) != "self"
])
return f"{cls_name}({params_string})"
[docs]class GaussianLikelihood(Likelihood):
def __init__(
self, prior_mu=None, prior_sigma=None, prior_strength=1.0, beta_nll=0.0
):
"""
Univariate Gaussian distribution.
https://en.wikipedia.org/wiki/Normal_distribution
Instead of the pure negative log likelihood (NLL) loss, the loss function used
is the :math:`\\beta`-NLL loss [1]_, parameterized by ``beta_nll`` in (0, 1).
For ``beta_nll=0`` it is equivalent to NLL, however larger values of ``beta_nll`` can
mitigate issues with NLL causing effective under-sampling of poorly fit regions
during training. ``beta_nll=1`` provides the same gradient for the mean as the MSE loss.
- Univariate continuous distribution.
- Support: :math:`\\mathbb{R}`.
- Parameters: mean :math:`\\mu \\in \\mathbb{R}`, standard deviation :math:`\\sigma > 0`.
Parameters
----------
prior_mu
mean of the prior Gaussian distribution (default: None).
prior_sigma
standard deviation (or scale) of the prior Gaussian distribution (default: None)
prior_strength
strength of the loss regularisation induced by the prior
beta_nll
The parameter :math:`0 \\leq \\beta \\leq 1` of the :math:`\\beta`-NLL loss [1]_.
Default: 0. (equivalent to NLL)
References
----------
.. [1] Seitzer et al.,
"On the Pitfalls of Heteroscedastic Uncertainty Estimation with Probabilistic Neural Networks"
https://arxiv.org/abs/2203.09168
"""
self.prior_mu = prior_mu
self.prior_sigma = prior_sigma
self.beta_nll = beta_nll
_check_strict_positive(self.prior_sigma, "sigma")
self.nllloss = nn.GaussianNLLLoss(full=True, reduction="none")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
def _nllloss(self, params_out, target, sample_weight):
means_out, sigmas_out = params_out
# Note: GaussianNLLLoss expects variance (and not stdev)
cont_var = sigmas_out.contiguous() ** 2
loss = self.nllloss(means_out.contiguous(), target.contiguous(), cont_var)
# apply Beta-NLL
if self.beta_nll > 0.0:
# Note: there is no mean reduction if beta_nll > 0, so we compute it here
loss = loss * (cont_var.detach() ** self.beta_nll)
if sample_weight is not None:
loss = loss * sample_weight
return loss.mean()
@property
def _prior_params(self):
return self.prior_mu, self.prior_sigma
def _distr_from_params(self, params):
mu, sigma = params
return _Normal(mu, sigma)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
mu, sigma = self._params_from_output(model_output)
return torch.normal(mu, sigma)
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output):
mu = model_output[:, :, :, 0]
sigma = self.softplus(model_output[:, :, :, 1])
return mu, sigma
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["mu", "sigma"])
[docs] def simplified_name(self) -> str:
return "gaussian"
[docs]class PoissonLikelihood(Likelihood):
def __init__(self, prior_lambda=None, prior_strength=1.0):
"""
Poisson distribution. Can typically be used to model event counts during time intervals, when the events
happen independently of the time since the last event.
https://en.wikipedia.org/wiki/Poisson_distribution
- Univariate discrete distribution
- Support: :math:`\\mathbb{N}_0` (natural numbers including 0).
- Parameter: rate :math:`\\lambda > 0`.
Parameters
----------
prior_lambda
rate :math:`\\lambda` of the prior Poisson distribution (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_lambda = prior_lambda
_check_strict_positive(self.prior_lambda, "lambda")
self.nllloss = nn.PoissonNLLLoss(log_input=False, full=True, reduction="none")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
def _nllloss(self, params_out, target, sample_weight):
lambda_out = params_out
loss = self.nllloss(lambda_out, target)
if sample_weight is not None:
loss = loss * sample_weight
return loss.mean()
@property
def _prior_params(self):
return (self.prior_lambda,)
def _distr_from_params(self, params):
lmbda = params[0]
return _Poisson(lmbda)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
model_lambda = self._params_from_output(model_output)
return torch.poisson(model_lambda)
@property
def num_parameters(self) -> int:
return 1
def _params_from_output(self, model_output):
lmbda = self.softplus(model_output.squeeze(dim=-1))
return lmbda
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["lambda"])
[docs] def simplified_name(self) -> str:
return "poisson"
[docs]class NegativeBinomialLikelihood(Likelihood):
def __init__(self):
"""
Negative Binomial distribution.
https://en.wikipedia.org/wiki/Negative_binomial_distribution
It does not support priors.
- Univariate discrete distribution.
- Support: :math:`\\mathbb{N}_0` (natural numbers including 0).
- Parameters: number of failures :math:`r > 0`, success probability :math:`p \\in (0, 1)`.
Behind the scenes the distribution is reparameterized so that the actual outputs of the
network are in terms of the mean :math:`\\mu` and shape :math:`\\alpha`.
"""
self.softplus = nn.Softplus()
super().__init__()
@property
def _prior_params(self):
return None
@staticmethod
def _get_r_and_p_from_mu_and_alpha(mu, alpha):
# See https://en.wikipedia.org/wiki/Negative_binomial_distribution for the different parametrizations
r = 1.0 / alpha
p = r / (mu + r)
return r, p
def _distr_from_params(self, params):
mu, alpha = params
r, p = NegativeBinomialLikelihood._get_r_and_p_from_mu_and_alpha(mu, alpha)
return _NegativeBinomial(r, p)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
mu, alpha = self._params_from_output(model_output)
r, p = NegativeBinomialLikelihood._get_r_and_p_from_mu_and_alpha(mu, alpha)
distr = _NegativeBinomial(r, p)
return distr.sample()
[docs] def predict_likelihood_parameters(self, model_output: torch.Tensor) -> torch.Tensor:
"""Overwrite the parent since the parameters are extracted in two steps."""
mu, alpha = self._params_from_output(model_output)
r, p = NegativeBinomialLikelihood._get_r_and_p_from_mu_and_alpha(mu, alpha)
return torch.cat([r, p], dim=-1)
def _params_from_output(self, model_output):
mu = self.softplus(model_output[:, :, :, 0])
alpha = self.softplus(model_output[:, :, :, 1])
return mu, alpha
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["r", "p"])
@property
def num_parameters(self) -> int:
return 2
[docs] def simplified_name(self) -> str:
return "negativebinomial"
[docs]class BernoulliLikelihood(Likelihood):
def __init__(self, prior_p=None, prior_strength=1.0):
"""
Bernoulli distribution.
https://en.wikipedia.org/wiki/Bernoulli_distribution
- Univariate discrete distribution.
- Support: :math:`\\{0, 1\\}`.
- Parameter: probability :math:`p \\in (0, 1)`.
Parameters
----------
prior_p
probability :math:`p` of the prior Bernoulli distribution (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_p = prior_p
_check_in_open_0_1_intvl(self.prior_p, "p")
self.sigmoid = nn.Sigmoid()
super().__init__(prior_strength)
@property
def _prior_params(self):
return (self.prior_p,)
def _distr_from_params(self, params):
p = params[0]
return _Bernoulli(p)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
model_p = self._params_from_output(model_output)
return torch.bernoulli(model_p)
@property
def num_parameters(self) -> int:
return 1
def _params_from_output(self, model_output: torch.Tensor):
p = self.sigmoid(model_output.squeeze(dim=-1))
return p
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["p"])
[docs] def simplified_name(self) -> str:
return "bernoulli"
[docs]class BetaLikelihood(Likelihood):
def __init__(self, prior_alpha=None, prior_beta=None, prior_strength=1.0):
"""
Beta distribution.
https://en.wikipedia.org/wiki/Beta_distribution
- Univariate continuous distribution.
- Support: open interval :math:`(0,1)`
- Parameters: shape parameters :math:`\\alpha > 0` and :math:`\\beta > 0`.
Parameters
----------
prior_alpha
shape parameter :math:`\\alpha` of the Beta distribution, strictly positive (default: None)
prior_beta
shape parameter :math:`\\beta` distribution, strictly positive (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_alpha = prior_alpha
self.prior_beta = prior_beta
_check_strict_positive(self.prior_alpha, "alpha")
_check_strict_positive(self.prior_beta, "beta")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_alpha, self.prior_beta
def _distr_from_params(self, params):
alpha, beta = params
return _Beta(alpha, beta)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
alpha, beta = self._params_from_output(model_output)
distr = _Beta(alpha, beta)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output):
alpha = self.softplus(model_output[:, :, :, 0])
beta = self.softplus(model_output[:, :, :, 1])
return alpha, beta
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(
input_series, ["alpha", "beta"]
)
[docs] def simplified_name(self) -> str:
return "beta"
[docs]class CauchyLikelihood(Likelihood):
def __init__(self, prior_xzero=None, prior_gamma=None, prior_strength=1.0):
"""
Cauchy Distribution.
https://en.wikipedia.org/wiki/Cauchy_distribution
- Univariate continuous distribution.
- Support: :math:`\\mathbb{R}`.
- Parameters: location :math:`x_0 \\in \\mathbb{R}`, scale :math:`\\gamma > 0`.
Due to its fat tails, this distribution is typically harder to estimate,
and your mileage may vary. Also be aware that it typically
requires a large value for `num_samples` for sampling predictions.
Parameters
----------
prior_xzero
location parameter :math:`x_0` of the Cauchy distribution (default: None)
prior_gamma
scale parameter :math:`\\gamma` of the Cauchy distribution, strictly positive (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_xzero = prior_xzero
self.prior_gamma = prior_gamma
_check_strict_positive(self.prior_gamma, "gamma")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_xzero, self.prior_gamma
def _distr_from_params(self, params):
xzero, gamma = params
return _Cauchy(xzero, gamma)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
xzero, gamma = self._params_from_output(model_output)
distr = _Cauchy(xzero, gamma)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output):
xzero = model_output[:, :, :, 0]
gamma = self.softplus(model_output[:, :, :, 1])
# We need this hack as sometimes the output of the softplus is 0 in practice for Cauchy...
gamma[gamma < MIN_CAUCHY_GAMMA_SAMPLING] = MIN_CAUCHY_GAMMA_SAMPLING
return xzero, gamma
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(
input_series, ["xzero", "gamma"]
)
[docs] def simplified_name(self) -> str:
return "cauchy"
[docs]class ContinuousBernoulliLikelihood(Likelihood):
def __init__(self, prior_lambda=None, prior_strength=1.0):
"""
Continuous Bernoulli distribution.
https://en.wikipedia.org/wiki/Continuous_Bernoulli_distribution
- Univariate continuous distribution.
- Support: open interval :math:`(0, 1)`.
- Parameter: shape :math:`\\lambda \\in (0,1)`
Parameters
----------
prior_lambda
shape :math:`\\lambda` of the prior Continuous Bernoulli distribution (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_lambda = prior_lambda
_check_in_open_0_1_intvl(self.prior_lambda, "lambda")
self.sigmoid = nn.Sigmoid()
super().__init__(prior_strength)
@property
def _prior_params(self):
return (self.prior_lambda,)
def _distr_from_params(self, params):
lmbda = params[0]
return _ContinuousBernoulli(lmbda)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
model_lmbda = self._params_from_output(model_output)
distr = _ContinuousBernoulli(model_lmbda)
return distr.sample()
@property
def num_parameters(self) -> int:
return 1
def _params_from_output(self, model_output: torch.Tensor):
lmbda = self.sigmoid(model_output.squeeze(dim=-1))
return lmbda
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["lambda"])
[docs] def simplified_name(self) -> str:
return "continuousbernoulli"
[docs]class DirichletLikelihood(Likelihood):
def __init__(self, prior_alphas=None, prior_strength=1.0):
"""
Dirichlet distribution.
https://en.wikipedia.org/wiki/Dirichlet_distribution
- Multivariate continuous distribution, modeling all components of a time series jointly.
- Support: The :math:`K`-dimensional simplex for series of dimension :math:`K`, i.e.,
:math:`x_1, ..., x_K \\text{ with } x_i \\in (0,1),\\; \\sum_i^K{x_i}=1`.
- Parameter: concentrations :math:`\\alpha_1, ..., \\alpha_K` with :math:`\\alpha_i > 0`.
Parameters
----------
prior_alphas
concentrations parameters :math:`\\alpha` of the prior Dirichlet distribution.
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_alphas = prior_alphas
_check_strict_positive(self.prior_alphas)
self.softmax = nn.Softmax(dim=2)
super().__init__(prior_strength)
@property
def _prior_params(self):
return (self.prior_alphas,)
def _distr_from_params(self, params: tuple):
alphas = params[0]
return _Dirichlet(alphas)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
alphas = self._params_from_output(model_output)
distr = _Dirichlet(alphas)
return distr.sample()
[docs] def predict_likelihood_parameters(self, model_output: torch.Tensor) -> torch.Tensor:
alphas = self._params_from_output(model_output)
return alphas
@property
def num_parameters(self) -> int:
return 1 # 1 parameter per component
def _params_from_output(self, model_output):
alphas = self.softmax(
model_output.squeeze(dim=-1)
) # take softmax over components
return alphas
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
# one alpha per component
return self._likelihood_generate_components_names(input_series, ["alpha"])
[docs] def simplified_name(self) -> str:
return "dirichlet"
[docs]class ExponentialLikelihood(Likelihood):
def __init__(self, prior_lambda=None, prior_strength=1.0):
"""
Exponential distribution.
https://en.wikipedia.org/wiki/Exponential_distribution
- Univariate continuous distribution.
- Support: :math:`\\mathbb{R}_{>0}`.
- Parameter: rate :math:`\\lambda > 0`.
Parameters
----------
prior_lambda
rate :math:`\\lambda` of the prior exponential distribution (default: None).
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_lambda = prior_lambda
_check_strict_positive(self.prior_lambda, "lambda")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return (self.prior_lambda,)
def _distr_from_params(self, params: tuple):
lmbda = params[0]
return _Exponential(lmbda)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
lmbda = self._params_from_output(model_output)
distr = _Exponential(lmbda)
return distr.sample()
@property
def num_parameters(self) -> int:
return 1
def _params_from_output(self, model_output: torch.Tensor):
lmbda = self.softplus(model_output.squeeze(dim=-1))
return lmbda
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["lambda"])
[docs] def simplified_name(self) -> str:
return "exponential"
[docs]class GammaLikelihood(Likelihood):
def __init__(self, prior_alpha=None, prior_beta=None, prior_strength=1.0):
"""
Gamma distribution.
https://en.wikipedia.org/wiki/Gamma_distribution
- Univariate continuous distribution
- Support: :math:`\\mathbb{R}_{>0}`.
- Parameters: shape :math:`\\alpha > 0` and rate :math:`\\beta > 0`.
Parameters
----------
prior_alpha
shape :math:`\\alpha` of the prior gamma distribution (default: None).
prior_beta
rate :math:`\\beta` of the prior gamma distribution (default: None).
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_alpha = prior_alpha
self.prior_beta = prior_beta
_check_strict_positive(self.prior_alpha, "alpha")
_check_strict_positive(self.prior_beta, "beta")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_alpha, self.prior_beta
def _distr_from_params(self, params: tuple):
alpha, beta = params
return _Gamma(alpha, beta)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
alpha, beta = self._params_from_output(model_output)
distr = _Gamma(alpha, beta)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output: torch.Tensor):
alpha = self.softplus(model_output[:, :, :, 0])
beta = self.softplus(model_output[:, :, :, 1])
return alpha, beta
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(
input_series, ["alpha", "beta"]
)
[docs] def simplified_name(self) -> str:
return "gamma"
[docs]class GeometricLikelihood(Likelihood):
def __init__(self, prior_p=None, prior_strength=1.0):
"""
Geometric distribution.
https://en.wikipedia.org/wiki/Geometric_distribution
- Univariate discrete distribution
- Support: :math:`\\mathbb{N}_0` (natural numbers including 0).
- Parameter: success probability :math:`p \\in (0, 1)`.
Parameters
----------
prior_p
success probability :math:`p` of the prior geometric distribution (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_p = prior_p
_check_in_open_0_1_intvl(self.prior_p, "p")
self.sigmoid = nn.Sigmoid()
super().__init__(prior_strength)
@property
def _prior_params(self):
return (self.prior_p,)
def _distr_from_params(self, params: tuple):
p = params[0]
return _Geometric(p)
[docs] def sample(self, model_output) -> torch.Tensor:
p = self._params_from_output(model_output)
distr = _Geometric(p)
return distr.sample()
@property
def num_parameters(self) -> int:
return 1
def _params_from_output(self, model_output: torch.Tensor):
p = self.sigmoid(model_output.squeeze(dim=-1))
return p
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["p"])
[docs] def simplified_name(self) -> str:
return "geometric"
[docs]class GumbelLikelihood(Likelihood):
def __init__(self, prior_mu=None, prior_beta=None, prior_strength=1.0):
"""
Gumbel distribution.
https://en.wikipedia.org/wiki/Gumbel_distribution
- Univariate continuous distribution
- Support: :math:`\\mathbb{R}`.
- Parameters: location :math:`\\mu \\in \\mathbb{R}` and scale :math:`\\beta > 0`.
Parameters
----------
prior_mu
location :math:`\\mu` of the prior Gumbel distribution (default: None).
prior_beta
scale :math:`\\beta` of the prior Gumbel distribution (default: None).
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_mu = prior_mu
self.prior_beta = prior_beta
_check_strict_positive(self.prior_beta)
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_mu, self.prior_beta
def _distr_from_params(self, params: tuple):
mu, beta = params
return _Gumbel(mu, beta)
[docs] def sample(self, model_output) -> torch.Tensor:
mu, beta = self._params_from_output(model_output)
distr = _Gumbel(mu, beta)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output: torch.Tensor):
mu = model_output[:, :, :, 0]
beta = self.softplus(model_output[:, :, :, 1])
return mu, beta
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["mu", "beta"])
[docs] def simplified_name(self) -> str:
return "gumbel"
[docs]class HalfNormalLikelihood(Likelihood):
def __init__(self, prior_sigma=None, prior_strength=1.0):
"""
Half-normal distribution.
https://en.wikipedia.org/wiki/Half-normal_distribution
- Univariate continuous distribution.
- Support: :math:`\\mathbb{R}_{>0}`.
- Parameter: rate :math:`\\sigma > 0`.
Parameters
----------
prior_sigma
standard deviation :math:`\\sigma` of the prior half-normal distribution (default: None).
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_sigma = prior_sigma
_check_strict_positive(self.prior_sigma, "sigma")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return (self.prior_sigma,)
def _distr_from_params(self, params: tuple):
sigma = params[0]
return _HalfNormal(sigma)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
sigma = self._params_from_output(model_output)
distr = _HalfNormal(sigma)
return distr.sample()
@property
def num_parameters(self) -> int:
return 1
def _params_from_output(self, model_output: torch.Tensor):
sigma = self.softplus(model_output.squeeze(dim=-1))
return sigma
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["sigma"])
[docs] def simplified_name(self) -> str:
return "halfnormal"
[docs]class LaplaceLikelihood(Likelihood):
def __init__(self, prior_mu=None, prior_b=None, prior_strength=1.0):
"""
Laplace distribution.
https://en.wikipedia.org/wiki/Laplace_distribution
- Univariate continuous distribution
- Support: :math:`\\mathbb{R}`.
- Parameters: location :math:`\\mu \\in \\mathbb{R}` and scale :math:`b > 0`.
Parameters
----------
prior_mu
location :math:`\\mu` of the prior Laplace distribution (default: None).
prior_b
scale :math:`b` of the prior Laplace distribution (default: None).
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_mu = prior_mu
self.prior_b = prior_b
_check_strict_positive(self.prior_b)
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_mu, self.prior_b
def _distr_from_params(self, params: tuple):
mu, b = params
return _Laplace(mu, b)
[docs] def sample(self, model_output) -> torch.Tensor:
mu, b = self._params_from_output(model_output)
distr = _Laplace(mu, b)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output: torch.Tensor):
mu = model_output[:, :, :, 0]
b = self.softplus(model_output[:, :, :, 1])
return mu, b
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["mu", "b"])
[docs] def simplified_name(self) -> str:
return "laplace"
[docs]class LogNormalLikelihood(Likelihood):
def __init__(self, prior_mu=None, prior_sigma=None, prior_strength=1.0):
"""
Log-normal distribution.
https://en.wikipedia.org/wiki/Log-normal_distribution
- Univariate continuous distribution.
- Support: :math:`\\mathbb{R}_{>0}`.
- Parameters: :math:`\\mu \\in \\mathbb{R}` and :math:`\\sigma > 0`.
Parameters
----------
prior_mu
parameter :math:`\\mu` of the prior log-normal distribution (default: None).
prior_sigma
parameter :math:`\\sigma` of the prior log-normal distribution (default: None)
prior_strength
strength of the loss regularisation induced by the prior
"""
self.prior_mu = prior_mu
self.prior_sigma = prior_sigma
_check_strict_positive(self.prior_sigma, "sigma")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_mu, self.prior_sigma
def _distr_from_params(self, params):
mu, sigma = params
return _LogNormal(mu, sigma)
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
mu, sigma = self._params_from_output(model_output)
distr = _LogNormal(mu, sigma)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output):
mu = model_output[:, :, :, 0]
sigma = self.softplus(model_output[:, :, :, 1])
return mu, sigma
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["mu", "sigma"])
[docs] def simplified_name(self) -> str:
return "lognormal"
[docs]class WeibullLikelihood(Likelihood):
def __init__(self, prior_strength=1.0):
"""
Weibull distribution.
https://en.wikipedia.org/wiki/Weibull_distribution
- Univariate continuous distribution
- Support: :math:`\\mathbb{R}_{>0}`.
- Parameters: scale :math:`\\lambda > 0` and concentration :math:`k > 0`.
It does not support priors.
Parameters
----------
prior_strength
strength of the loss regularisation induced by the prior
"""
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return None
def _distr_from_params(self, params: tuple):
lmba, k = params
return _Weibull(lmba, k)
[docs] def sample(self, model_output) -> torch.Tensor:
lmbda, k = self._params_from_output(model_output)
distr = _Weibull(lmbda, k)
return distr.sample()
@property
def num_parameters(self) -> int:
return 2
def _params_from_output(self, model_output: torch.Tensor):
lmbda = self.softplus(model_output[:, :, :, 0])
k = self.softplus(model_output[:, :, :, 1])
return lmbda, k
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
return self._likelihood_generate_components_names(input_series, ["lambda", "k"])
[docs] def simplified_name(self) -> str:
return "weibull"
[docs]class QuantileRegression(Likelihood):
def __init__(self, quantiles: Optional[list[float]] = None):
"""
The "likelihood" corresponding to quantile regression.
It uses the Quantile Loss Metric for custom quantiles centered around q=0.5.
This class can be used as any other Likelihood objects even though it is not
representing the likelihood of a well defined distribution.
Parameters
----------
quantiles
list of quantiles
"""
super().__init__()
if quantiles is None:
self.quantiles = [
0.01,
0.05,
0.1,
0.15,
0.2,
0.25,
0.3,
0.4,
0.5,
0.6,
0.7,
0.75,
0.8,
0.85,
0.9,
0.95,
0.99,
]
else:
self.quantiles = sorted(quantiles)
_check_quantiles(self.quantiles)
self._median_idx = self.quantiles.index(0.5)
self.first = True
self.quantiles_tensor = None
# overwrite the attributes of Likelihood parent class
self.ignore_attrs_equality = ["first", "quantiles_tensor"]
[docs] def sample(self, model_output: torch.Tensor) -> torch.Tensor:
"""
Sample uniformly between [0, 1] (for each batch example) and return the linear interpolation between the fitted
quantiles closest to the sampled value.
model_output is of shape (batch_size, n_timesteps, n_components, n_quantiles)
"""
device = model_output.device
num_samples, n_timesteps, n_components, n_quantiles = model_output.shape
# obtain samples
probs = torch.rand(
size=(
num_samples,
n_timesteps,
n_components,
1,
)
).to(device)
# add dummy dim
probas = probs.unsqueeze(-2)
# tile and transpose
p = torch.tile(probas, (1, 1, 1, n_quantiles, 1)).transpose(4, 3)
# prepare quantiles
tquantiles = torch.tensor(self.quantiles).reshape((1, 1, 1, -1)).to(device)
# calculate index of biggest quantile smaller than the sampled value
left_idx = torch.sum(p > tquantiles, dim=-1)
# obtain index of smallest quantile bigger than sampled value
right_idx = left_idx + 1
# repeat the model output on the edges
repeat_count = [1] * n_quantiles
repeat_count[0] = 2
repeat_count[-1] = 2
repeat_count = torch.tensor(repeat_count).to(device)
shifted_output = torch.repeat_interleave(model_output, repeat_count, dim=-1)
# obtain model output values corresponding to the quantiles left and right of the sampled value
left_value = torch.gather(shifted_output, index=left_idx, dim=-1)
right_value = torch.gather(shifted_output, index=right_idx, dim=-1)
# add 0 and 1 to quantiles
ext_quantiles = [0.0] + self.quantiles + [1.0]
expanded_q = torch.tile(torch.tensor(ext_quantiles), left_idx.shape).to(device)
# calculate closest quantiles to the sampled value
left_q = torch.gather(expanded_q, index=left_idx, dim=-1)
right_q = torch.gather(expanded_q, index=right_idx, dim=-1)
# linear interpolation
weights = (probs - left_q) / (right_q - left_q)
inter = left_value + weights * (right_value - left_value)
return inter.squeeze(-1)
[docs] def predict_likelihood_parameters(self, model_output: torch.Tensor) -> torch.Tensor:
"""Overwrite parent method since QuantileRegression is not a Likelihood per-se and
parameters must be extracted differently."""
num_samples, n_timesteps, n_components, n_quantiles = model_output.shape
params = model_output.reshape(
num_samples, n_timesteps, n_components * n_quantiles
)
return params
@property
def num_parameters(self) -> int:
return len(self.quantiles)
[docs] def compute_loss(
self,
model_output: torch.Tensor,
target: torch.Tensor,
sample_weight: torch.Tensor,
):
"""
We are re-defining a custom loss (which is not a likelihood loss) compared to Likelihood
Parameters
----------
model_output
must be of shape (batch_size, n_timesteps, n_target_variables, n_quantiles)
target
must be of shape (n_samples, n_timesteps, n_target_variables)
sample_weight
must be of shape (n_samples, n_timesteps, n_target_variables)
"""
dim_q = 3
device = model_output.device
# test if torch model forward produces correct output and store quantiles tensor
if self.first:
raise_if_not(
len(model_output.shape) == 4
and len(target.shape) == 3
and model_output.shape[:2] == target.shape[:2],
"mismatch between predicted and target shape",
)
raise_if_not(
model_output.shape[dim_q] == len(self.quantiles),
"mismatch between number of predicted quantiles and target quantiles",
)
self.quantiles_tensor = torch.tensor(self.quantiles).to(device)
self.first = False
errors = target.unsqueeze(-1) - model_output
losses = torch.max(
(self.quantiles_tensor - 1) * errors, self.quantiles_tensor * errors
).sum(dim=dim_q)
if sample_weight is not None:
losses = losses * sample_weight
return losses.mean()
def _distr_from_params(self, params: tuple) -> None:
# This should not be called in this class (we are abusing Likelihood)
return None
def _params_from_output(self, model_output: torch.Tensor) -> None:
# This should not be called in this class (we are abusing Likelihood)
return None
[docs] def likelihood_components_names(self, input_series: TimeSeries) -> list[str]:
"""Each component have their own quantiles"""
return likelihood_component_names(
components=input_series.components,
parameter_names=quantile_names(self.quantiles),
)
[docs] def simplified_name(self) -> str:
return "quantile"
""" TODO
To make it work, we'll have to change our models so they optionally accept an absolute
number of parameters, instead of num_parameters per component.
from torch.distributions import MultivariateNormal as _MultivariateNormal
class MultivariateNormal(Likelihood):
def __init__(
self, dim: int, prior_mu=None, prior_covmat=None, prior_strength=1.0
):
self.dim = dim
self.prior_mu = prior_mu
self.prior_covmat = prior_covmat
if self.prior_mu is not None:
raise_if_not(
len(self.prior_mu) == self.dim,
"The provided prior_mu must have a size matching the "
"provided dimension.",
)
if self.prior_covmat is not None:
raise_if_not(
self.prior_covmat.shape == (self.dim, self.dim),
"The provided prior on the covariaance "
"matrix must have size (dim, dim).",
)
_check_strict_positive(self.prior_covmat.flatten(), "covariance matrix")
self.softplus = nn.Softplus()
super().__init__(prior_strength)
@property
def _prior_params(self):
return self.prior_mu, self.prior_covmat
def _distr_from_params(self, params: Tuple):
mu, covmat = params
return _MultivariateNormal(mu, covmat)
def sample(self, model_output: torch.Tensor):
mu, covmat = self._params_from_output(model_output)
distr = _MultivariateNormal(mu, covmat)
return distr.sample()
@property
def num_parameters(self) -> int:
return int(self.dim + (self.dim ** 2 - self.dim) / 2)
def _params_from_output(self, model_output: torch.Tensor):
device = model_output.device
mu = model_output[:, :, : self.dim]
covmat_coefs = self.softplus(model_output[:, :, self.dim :])
print("model output: {}".format(model_output.shape))
# build covariance matrix
covmat = torch.zeros(
(model_output.shape[0], model_output.shape[1], self.dim, self.dim)
).to(device)
tril_indices = torch.tril_indices(
row=self.dim, col=self.dim, offset=1, device=device
)
covmat[tril_indices[0], tril_indices[1]] = covmat_coefs
covmat[tril_indices[1], tril_indices[0]] = covmat_coefs
covmat[range(self.dim), range(self.dim)] = 1.0
"""