Source code for gluonts.mx.distribution.box_cox_transform
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
from typing import Dict, List, Tuple
import mxnet as mx
from gluonts.core.component import validated
from gluonts.mx import Tensor
from .bijection import Bijection, InverseBijection
from .bijection_output import BijectionOutput
from .distribution import getF, softplus
[docs]class BoxCoxTransform(Bijection):
r"""
Implements Box-Cox transformation of a uni-variate random variable.
The Box-Cox transformation of an observation :math:`z` is given by:
.. math::
BoxCox(z; \lambda_1, \lambda_2) = \begin{cases}
((z + \lambda_2)^{\lambda_1} - 1) / \lambda_1, \quad & \text{if }
\lambda_1 \neq 0, \\
\log (z + \lambda_2), \quad & \text{otherwise.}
\end{cases}
Here, :math:`\lambda_1` and :math:`\lambda_2` are learnable parameters.
Note that the domain of the transformation is not restricted.
For numerical stability, instead of checking :math:`\lambda_1` is exactly
zero, we use the condition
.. math::
|\lambda_1| < tol\_lambda\_1
for a pre-specified tolerance `tol_lambda_1`.
Inverse of the Box-Cox Transform is given by
.. math::
BoxCox^{-1}(y; \lambda_1, \lambda_2) = \begin{cases}
(y \lambda_1 + 1)^{(1/\lambda_1)} - \lambda_2, \quad & \text{if }
\lambda_1 \neq 0, \\
\exp (y) - \lambda_2, \quad & \text{otherwise.}
\end{cases}
**Notes on numerical stability:**
1. For the forward transformation, :math:`\lambda_2` must always be chosen
such that
.. math::
z + \lambda_2 > 0.
To achieve this one needs to know a priori the lower bound on the
observations. This is set in `BoxCoxTransformOutput`,
since :math:`\lambda_2` is learnable.
2. Similarly for the inverse transformation to work reliably, a sufficient
condition is
.. math::
y \lambda_1 + 1 \geq 0,
where :math:`y` is the input to the inverse transformation.
This cannot always be guaranteed especially when :math:`y` is a sample
from a transformed distribution. Hence we always
truncate :math:`y \lambda_1 + 1` at zero.
An example showing why this could happen in our case: consider
transforming observations from the unit interval (0, 1) with
parameters
.. math::
\begin{align}
\lambda_1 = &\ 1.1, \\
\lambda_2 = &\ 0.
\end{align}
Then the range of the transformation is (-0.9090, 0.0). If Gaussian is
fit to the transformed observations and a sample is drawn from it,
then it is likely that the sample is outside this range, e.g., when
the mean is close to -0.9. The subsequent inverse transformation of
the sample is not a real number anymore.
>>> y = -0.91
>>> lambda_1 = 1.1
>>> lambda_2 = 0.0
>>> (y * lambda_1 + 1) ** (1 / lambda_1) + lambda_2
(-0.0017979146510711471+0.0005279153735965289j)
Parameters
----------
lambda_1
lambda_2
tol_lambda_1
For numerical stability, treat `lambda_1` as zero if it is less than
`tol_lambda_1`
F
"""
arg_names = ["box_cox.lambda_1", "box_cox.lambda_2"]
@validated()
def __init__(
self,
lambda_1: Tensor,
lambda_2: Tensor,
tol_lambda_1: float = 1e-2,
F=None,
) -> None:
self.lambda_1 = lambda_1
self.lambda_2 = lambda_2
self.tol_lambda_1 = tol_lambda_1
self.F = F if F else getF(lambda_1)
# Addressing mxnet madness
self._power = self.F.power if self.F == mx.nd else self.F.pow
@property
def args(self) -> List:
r"""
List: current values of the parameters
"""
return [self.lambda_1, self.lambda_2]
@property
def event_dim(self) -> int:
return 0
@property
def sign(self) -> Tensor:
return 1.0
[docs] def f(self, z: Tensor) -> Tensor:
r"""
Forward transformation of observations `z`
Parameters
----------
z
observations
Returns
-------
Tensor
Transformed observations
"""
F = self.F
lambda_1 = self.lambda_1
lambda_2 = self.lambda_2
tol_lambda_1 = self.tol_lambda_1
_power = self._power
return F.where(
condition=(F.abs(lambda_1).__ge__(tol_lambda_1).broadcast_like(z)),
x=(_power(z + lambda_2, lambda_1) - 1.0) / lambda_1,
y=F.log(z + lambda_2),
name="Box_Cox_trans",
)
[docs] def f_inv(self, y: Tensor) -> Tensor:
r"""
Inverse of the Box-Cox Transform.
Parameters
----------
y
Transformed observations
Returns
-------
Tensor
Observations
"""
F = self.F
lambda_1 = self.lambda_1
lambda_2 = self.lambda_2
tol_lambda_1 = self.tol_lambda_1
_power = self._power
# For numerical stability we truncate :math:`y * \lambda_1 + 1.0` at
# zero.
base = F.relu(y * lambda_1 + 1.0)
return F.where(
condition=(F.abs(lambda_1).__ge__(tol_lambda_1)).broadcast_like(y),
x=_power(base, 1.0 / lambda_1) - lambda_2,
y=F.exp(y) - lambda_2,
name="Box_Cox_inverse_trans",
)
[docs] def log_abs_det_jac(self, z: Tensor, y: Tensor = None) -> Tensor:
r"""
Logarithm of the absolute value of the Jacobian determinant
corresponding to the Box-Cox Transform is given by.
.. math::
\log \frac{d}{dz} BoxCox(z; \lambda_1, \lambda_2) = \begin{cases}
\log (z + \lambda_2) (\lambda_1 - 1), \quad & \text{if } \lambda_1 \neq 0, \\
-\log (z + \lambda_2), \quad & \text{otherwise.}
\end{cases}
Note that the derivative of the transformation is always non-negative.
Parameters
----------
z
observations
y
not used
Returns
-------
Tensor
""" # noqa: E501
F = self.F
lambda_1 = self.lambda_1
lambda_2 = self.lambda_2
tol_lambda_1 = self.tol_lambda_1
return F.where(
condition=F.abs(lambda_1).__ge__(tol_lambda_1),
x=F.log(z + lambda_2) * (lambda_1 - 1.0),
y=-F.log(z + lambda_2),
name="Box_Cox_trans_log_det_jac",
)
[docs]class BoxCoxTransformOutput(BijectionOutput):
bij_cls: type = BoxCoxTransform
args_dim: Dict[str, int] = dict(zip(BoxCoxTransform.arg_names, [1, 1]))
@validated()
def __init__(self, lb_obs: float = 0.0, fix_lambda_2: bool = True) -> None:
super().__init__()
self.lb_obs = lb_obs
self.fix_lambda_2 = fix_lambda_2
[docs] def domain_map(self, F, *args: Tensor) -> Tuple[Tensor, ...]:
lambda_1, lambda_2 = args
if self.fix_lambda_2:
lambda_2 = -self.lb_obs * F.ones_like(lambda_2)
else:
# This makes sure that :math:`z + \lambda_2 > 0`, where :math:`z >
# lb_obs`
lambda_2 = softplus(F, lambda_2) - self.lb_obs * F.ones_like(
lambda_2
)
# we squeeze the output since event_shape is ()
return lambda_1.squeeze(axis=-1), lambda_2.squeeze(axis=-1)
@property
def event_shape(self) -> Tuple:
return ()
[docs]class InverseBoxCoxTransform(InverseBijection):
"""
Implements the inverse of Box-Cox transformation as a bijection.
"""
arg_names = ["box_cox.lambda_1", "box_cox.lambda_2"]
@validated()
def __init__(
self,
lambda_1: Tensor,
lambda_2: Tensor,
tol_lambda_1: float = 1e-2,
F=None,
) -> None:
super().__init__(BoxCoxTransform(lambda_1, lambda_2, tol_lambda_1, F))
@property
def event_dim(self) -> int:
return 0
[docs]class InverseBoxCoxTransformOutput(BoxCoxTransformOutput):
bij_cls: type = InverseBoxCoxTransform
args_dim: Dict[str, int] = dict(
zip(InverseBoxCoxTransform.arg_names, [1, 1])
)
@property
def event_shape(self) -> Tuple:
return ()