Source code for gluonts.mx.model.seq2seq._mq_dnn_estimator

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

import logging
from distutils.util import strtobool
from typing import List, Optional

import mxnet as mx
import numpy as np

from gluonts.core.component import validated
from gluonts.dataset.stat import calculate_dataset_statistics
from gluonts.mx.block.decoder import ForkingMLPDecoder
from gluonts.mx.block.encoder import (
    HierarchicalCausalConv1DEncoder,
    RNNEncoder,
)
from gluonts.mx.block.quantile_output import (
    QuantileOutput,
    IncrementalQuantileOutput,
)
from gluonts.mx.distribution import DistributionOutput
from gluonts.mx.trainer import Trainer
from gluonts.transform import InstanceSampler

from ._forking_estimator import ForkingSeq2SeqEstimator


[docs]class MQCNNEstimator(ForkingSeq2SeqEstimator):
    """
    An :class:`MQDNNEstimator` with a Convolutional Neural Network (CNN) as an
    encoder and a multi-quantile MLP as a decoder. Implements the MQ-CNN
    Forecaster, proposed in [WTN+17]_.

    Note that MQCNN uses ValidationSplitSampler as its default
    train_sampler. If context_length is less than the length of the input
    time series, only one example will be used for training.

    Parameters
    ----------
    freq
        Time granularity of the data.
    prediction_length
        Length of the prediction, also known as 'horizon'.
    context_length
        Number of time units that condition the predictions, also known as
        'lookback period'. (default: 4 * prediction_length)
    use_past_feat_dynamic_real
        Whether to use the ``past_feat_dynamic_real`` field from the data.
        (default: False) Automatically inferred when creating the
        MQCNNEstimator with the `from_inputs` class method.
    use_feat_dynamic_real
        Whether to use the ``feat_dynamic_real`` field from the data.
        (default: False) Automatically inferred when creating the
        MQCNNEstimator with the `from_inputs` class method.
    use_feat_static_cat:
        Whether to use the ``feat_static_cat`` field from the data.
        (default: False) Automatically inferred when creating the
        MQCNNEstimator with the `from_inputs` class method.
    cardinality:
        Number of values of each categorical feature.
        This must be set if ``use_feat_static_cat == True`` (default: None)
        Automatically inferred when creating the MQCNNEstimator with the
        `from_inputs` class method.
    embedding_dimension:
        Dimension of the embeddings for categorical features.
        (default: [min(50, (cat+1)//2) for cat in cardinality])
    add_time_feature
        Adds a set of time features. (default: True)
    add_age_feature
        Adds an age feature. (default: False)
        The age feature starts with a small value at the start of the time
        series and grows over time.
    enable_encoder_dynamic_feature
        Whether the encoder should also be provided with the dynamic features
        (``age``, ``time`` and ``feat_dynamic_real`` if enabled respectively).
        (default: True)
    enable_decoder_dynamic_feature
        Whether the decoder should also be provided with the dynamic features
        (``age``, ``time`` and ``feat_dynamic_real`` if enabled respectively).
        (default: True)
        It makes sense to disable this, if you don't have ``feat_dynamic_real``
        for the prediction range.
    seed
        Will set the specified int seed for numpy and MXNet if specified.
        (default: None)
    decoder_mlp_dim_seq
        The dimensionalities of the Multi Layer Perceptron layers of the
        decoder. (default: [30])
    channels_seq
        The number of channels (i.e. filters or convolutions) for each layer of
        the HierarchicalCausalConv1DEncoder. More channels usually correspond
        to better performance and larger network size.(default: [30, 30, 30])
    dilation_seq
        The dilation of the convolutions in each layer of the
        HierarchicalCausalConv1DEncoder. Greater numbers correspond to a
        greater receptive field of the network, which is usually better with
        longer context_length. (Same length as channels_seq)
        (default: [1, 3, 5])
    kernel_size_seq
        The kernel sizes (i.e. window size) of the convolutions in each layer
        of the HierarchicalCausalConv1DEncoder.
        (Same length as channels_seq) (default: [7, 3, 3])
    use_residual
        Whether the hierarchical encoder should additionally pass the unaltered
        past target to the decoder. (default: True)
    quantiles
        The list of quantiles that will be optimized for, and predicted by, the
        model. Optimizing for more quantiles than are of direct interest to you
        can result in improved performance due to a regularizing effect.
        (default: [0.025, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.975])
    distr_output
        DistributionOutput to use. Only one between `quantile` and
        `distr_output` can be set. (Default: None)
    trainer
        The GluonTS trainer to use for training. (default: Trainer())
    scaling
        Whether to automatically scale the target values. (default: False if
        quantile_output is used, True otherwise)
    scaling_decoder_dynamic_feature
        Whether to automatically scale the dynamic features for the decoder.
        (default: False)
    num_forking
        Decides how much forking to do in the decoder. 1 reduces to seq2seq and
        enc_len reduces to MQ-CNN.
    max_ts_len
        Returns the length of the longest time series in the dataset to be used
        in bounding context_length.
    is_iqf
        Determines whether to use IQF or QF. (default: True).
    batch_size
        The size of the batches to be used training and prediction.
    train_sampler
        Controls the sampling of windows during training.
    validation_sampler
        Controls the sampling of windows during validation.
    """

    @validated()
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        use_past_feat_dynamic_real: bool = False,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        add_time_feature: bool = True,
        add_age_feature: bool = False,
        enable_encoder_dynamic_feature: bool = True,
        enable_decoder_dynamic_feature: bool = True,
        seed: Optional[int] = None,
        decoder_mlp_dim_seq: Optional[List[int]] = None,
        channels_seq: Optional[List[int]] = None,
        dilation_seq: Optional[List[int]] = None,
        kernel_size_seq: Optional[List[int]] = None,
        use_residual: bool = True,
        quantiles: Optional[List[float]] = None,
        distr_output: Optional[DistributionOutput] = None,
        trainer: Trainer = Trainer(),
        scaling: Optional[bool] = None,
        scaling_decoder_dynamic_feature: bool = False,
        num_forking: Optional[int] = None,
        max_ts_len: Optional[int] = None,
        is_iqf: bool = True,
        batch_size: int = 32,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
    ) -> None:
        assert (distr_output is None) or (quantiles is None)
        assert (
            prediction_length > 0
        ), f"Invalid prediction length: {prediction_length}."
        assert decoder_mlp_dim_seq is None or all(
            d > 0 for d in decoder_mlp_dim_seq
        ), "Elements of `mlp_hidden_dimension_seq` should be > 0"
        assert channels_seq is None or all(
            d > 0 for d in channels_seq
        ), "Elements of `channels_seq` should be > 0"
        assert dilation_seq is None or all(
            d > 0 for d in dilation_seq
        ), "Elements of `dilation_seq` should be > 0"
        assert kernel_size_seq is None or all(
            d > 0 for d in kernel_size_seq
        ), "Elements of `kernel_size_seq` should be > 0"
        assert quantiles is None or all(
            0 <= d <= 1 for d in quantiles
        ), "Elements of `quantiles` should be >= 0 and <= 1"

        self.decoder_mlp_dim_seq = (
            decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [30]
        )
        self.channels_seq = (
            channels_seq if channels_seq is not None else [30, 30, 30]
        )
        self.dilation_seq = (
            dilation_seq if dilation_seq is not None else [1, 3, 9]
        )
        self.kernel_size_seq = (
            kernel_size_seq if kernel_size_seq is not None else [7, 3, 3]
        )
        self.quantiles = (
            quantiles
            if (quantiles is not None) or (distr_output is not None)
            else [0.025, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.975]
        )
        self.is_iqf = is_iqf

        assert (
            len(self.channels_seq)
            == len(self.dilation_seq)
            == len(self.kernel_size_seq)
        ), (
            f"mismatch CNN configurations: {len(self.channels_seq)} vs. "
            f"{len(self.dilation_seq)} vs. {len(self.kernel_size_seq)}"
        )

        if seed is not None:
            np.random.seed(seed)
            mx.random.seed(seed, trainer.ctx)

        # `use_static_feat` and `use_dynamic_feat` always True because network
        # always receives input; either from the input data or constants
        encoder = HierarchicalCausalConv1DEncoder(
            dilation_seq=self.dilation_seq,
            kernel_size_seq=self.kernel_size_seq,
            channels_seq=self.channels_seq,
            use_residual=use_residual,
            use_static_feat=True,
            use_dynamic_feat=True,
            prefix="encoder_",
        )

        decoder = ForkingMLPDecoder(
            dec_len=prediction_length,
            final_dim=self.decoder_mlp_dim_seq[-1],
            hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1],
            prefix="decoder_",
        )

        if not self.quantiles:
            quantile_output = None
        elif is_iqf:
            quantile_output = IncrementalQuantileOutput(self.quantiles)
        else:
            quantile_output = QuantileOutput(self.quantiles)

        super().__init__(
            encoder=encoder,
            decoder=decoder,
            quantile_output=quantile_output,
            distr_output=distr_output,
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            use_past_feat_dynamic_real=use_past_feat_dynamic_real,
            use_feat_dynamic_real=use_feat_dynamic_real,
            use_feat_static_cat=use_feat_static_cat,
            enable_encoder_dynamic_feature=enable_encoder_dynamic_feature,
            enable_decoder_dynamic_feature=enable_decoder_dynamic_feature,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            add_time_feature=add_time_feature,
            add_age_feature=add_age_feature,
            trainer=trainer,
            scaling=scaling,
            scaling_decoder_dynamic_feature=scaling_decoder_dynamic_feature,
            num_forking=num_forking,
            max_ts_len=max_ts_len,
            train_sampler=train_sampler,
            validation_sampler=validation_sampler,
            batch_size=batch_size,
        )

[docs]    @classmethod
    def derive_auto_fields(cls, train_iter):
        stats = calculate_dataset_statistics(train_iter)

        return {
            "use_past_feat_dynamic_real": stats.num_past_feat_dynamic_real > 0,
            "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0,
            "use_feat_static_cat": bool(stats.feat_static_cat),
            "cardinality": [len(cats) for cats in stats.feat_static_cat],
            "max_ts_len": stats.max_target_length,
        }

[docs]    @classmethod
    def from_inputs(cls, train_iter, **params):
        logger = logging.getLogger(__name__)
        logger.info(
            f"gluonts[from_inputs]: User supplied params set to {params}"
        )
        # auto_params usually include `use_feat_dynamic_real`,
        # `use_past_feat_dynamic_real`, `use_feat_static_cat` and
        # `cardinality`
        auto_params = cls.derive_auto_fields(train_iter)

        fields = [
            "use_feat_dynamic_real",
            "use_past_feat_dynamic_real",
            "use_feat_static_cat",
        ]
        # user defined arguments become implications
        for field in fields:
            if field in params.keys():
                is_params_field = (
                    params[field]
                    if isinstance(params[field], bool)
                    else strtobool(params[field])
                )
                if is_params_field and not auto_params[field]:
                    logger.warning(
                        f"gluonts[from_inputs]: {field} set to False since it"
                        " is not present in the data."
                    )
                    params[field] = False
                    if field == "use_feat_static_cat":
                        params["cardinality"] = None
                elif (
                    field == "use_feat_static_cat"
                    and not is_params_field
                    and auto_params[field]
                ):
                    params["cardinality"] = None

        # user specified 'params' will take precedence:
        params = {**auto_params, **params}
        logger.info(
            "gluonts[from_inputs]: use_past_feat_dynamic_real set to"
            f" '{params['use_past_feat_dynamic_real']}', use_feat_dynamic_real"
            f" set to '{params['use_feat_dynamic_real']}', and"
            f" use_feat_static_cat set to '{params['use_feat_static_cat']}'"
            f" with cardinality of '{params['cardinality']}'"
        )
        return cls.from_hyperparameters(**params)


[docs]class MQRNNEstimator(ForkingSeq2SeqEstimator):
    """
    An :class:`MQDNNEstimator` with a Recurrent Neural Network (RNN) as an
    encoder and a multi-quantile MLP as a decoder.

    Implements the MQ-RNN Forecaster, proposed in [WTN+17]_.

    Note that MQRNN uses ValidationSplitSampler as its default
    train_sampler. If context_length is less than the length of the input
    time series, only one example will be used for training.
    """

    @validated()
    def __init__(
        self,
        prediction_length: int,
        freq: str,
        context_length: Optional[int] = None,
        decoder_mlp_dim_seq: Optional[List[int]] = None,
        trainer: Trainer = Trainer(),
        quantiles: Optional[List[float]] = None,
        distr_output: Optional[DistributionOutput] = None,
        scaling: Optional[bool] = None,
        scaling_decoder_dynamic_feature: bool = False,
        num_forking: Optional[int] = None,
        is_iqf: bool = True,
        batch_size: int = 32,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
    ) -> None:
        assert (
            prediction_length > 0
        ), f"Invalid prediction length: {prediction_length}."
        assert decoder_mlp_dim_seq is None or all(
            d > 0 for d in decoder_mlp_dim_seq
        ), "Elements of `mlp_hidden_dimension_seq` should be > 0"
        assert quantiles is None or all(
            0 <= d <= 1 for d in quantiles
        ), "Elements of `quantiles` should be >= 0 and <= 1"

        self.decoder_mlp_dim_seq = (
            decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [30]
        )
        self.quantiles = (
            quantiles
            if (quantiles is not None) or (distr_output is not None)
            else [0.025, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.975]
        )
        self.is_iqf = is_iqf

        # `use_static_feat` and `use_dynamic_feat` always True because network
        # always receives input; either from the input data or constants
        encoder = RNNEncoder(
            mode="gru",
            hidden_size=50,
            num_layers=1,
            bidirectional=True,
            prefix="encoder_",
            use_static_feat=True,
            use_dynamic_feat=True,
        )

        decoder = ForkingMLPDecoder(
            dec_len=prediction_length,
            final_dim=self.decoder_mlp_dim_seq[-1],
            hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1],
            prefix="decoder_",
        )

        if not self.quantiles:
            quantile_output = None
        elif is_iqf:
            quantile_output = IncrementalQuantileOutput(self.quantiles)
        else:
            quantile_output = QuantileOutput(self.quantiles)

        super().__init__(
            encoder=encoder,
            decoder=decoder,
            quantile_output=quantile_output,
            distr_output=distr_output,
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            scaling=scaling,
            scaling_decoder_dynamic_feature=scaling_decoder_dynamic_feature,
            num_forking=num_forking,
            train_sampler=train_sampler,
            validation_sampler=validation_sampler,
            batch_size=batch_size,
        )