Source code for gluonts.zebras._freq

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Tuple, Optional

import numpy as np

from gluonts import maybe

NpFreq = Tuple[str, int]

weekday_offsets = {
    "MON": 0,
    "TUE": 1,
    "WED": 2,
    "THU": 3,
    "FRI": 4,
    "SAT": 5,
    "SUN": 6,
}


def _canonical_freqstr(n: int, name: str, suffix: Optional[str] = None) -> str:
    """
    Canonical name of frequency.

    >>> _canonical_freqstr(1, "X")
    'X'
    >>> _canonical_freqstr(3, "X")
    '3X'
    >>> _canonical_freqstr(3, "X", "Y")
    '3X-Y'

    This allows us to easily string compare frequencies
    (solves ``"1X" != "X"``).
    """

    if suffix:
        name = f"{name}-{suffix}"

    if n == 1:
        return name

    return f"{n}{name}"


_freq_numpy_to_pandas = {
    "Y": "Y",
    "D": "D",
    "W": "W",
    "M": "M",
    "h": "H",
    "m": "T",
    "s": "S",
}

_freq_pandas_to_numpy = dict(
    {
        pd_freq: (np_freq, 1)
        for np_freq, pd_freq in _freq_numpy_to_pandas.items()
    },
    **{
        "A": ("Y", 1),
        "AS": ("Y", 1),
        "YS": ("Y", 1),
        "MS": ("M", 1),
        "MIN": ("m", 1),
        "Q": ("M", 3),
        "QS": ("M", 3),
        "B": ("D", 1),
        "W": ("D", 1),
    },
)


def _canonical_name(name: str) -> str:
    return {"MIN": "T", "Y": "A"}.get(name, name)


[docs]@dataclass
class Freq:
    """
    A class representing frequencies, such as n-days.

    Note: Use ``freq`` to construct instances of ``Freq``.

    We use frequency aliases from pandas over frequency names defined by numpy.
    For example, the name for minutely is either "min" or "T", while "m"
    and "M" represent monthly frequencies. In contrast numpy uses "m" for
    minutely and "M" for monthly. In addition, pandas defines some frequencies
    which do not exist in numpy, for example quarterly frequencies.

    However, internally we use ``numpy.datetime64`` objects and thus we must
    support numpy's frequency names as well. To do this we generally use base
    frequencies (multiple = 1), since numpy otherwise aligns timestamps for us
    which we don't want.

    Weekly frequency needs to be handled specially, since numpy counts the
    number of weeks since Thu Jan 1 1970 and uses Thursday and aligns the
    timestamp to Thursday. We therefore use daily frequency internally and
    align to Monday.
    """

    name: str
    n: int
    suffix: Optional[str] = None

    def __post_init__(self):
        self.name = _canonical_name(self.name)

    @property
    def __init_passed_kwargs__(self):
        return {"name": self.name, "n": self.n}

    @property
    def np_freq(self) -> NpFreq:
        return _freq_pandas_to_numpy[self.name]

    @property
    def step(self):
        if self.name == "W":
            return self.n * 7

        return self.n

    @classmethod
    def __get_validators__(cls):
        # pydantic support
        yield freq

[docs]    @classmethod
    def from_pandas(cls, freq) -> Freq:
        if not isinstance(freq, str):
            if hasattr(freq, "freqstr"):
                freq = freq.freqstr
            else:
                raise ValueError(f"Invalid freq {freq}: {type(freq)}")

        match = maybe.expect(
            re.match(
                r"(?P<n>\d+)?\s*(?P<freq>\w+)(?P<suffix>\-\w+)?",
                freq.upper(),
            ),
            f"Unsupported freq format {freq}",
        )

        groups = match.groupdict()
        n = maybe.map_or(groups["n"], int, 1)

        suffix = groups["suffix"]
        if suffix is not None:
            # remove leading `-` from `-MON`
            suffix = suffix[1:]

        return cls(groups["freq"], n, suffix)

[docs]    def to_pandas(self):
        from pandas.tseries.frequencies import to_offset

        return to_offset(str(self))

[docs]    def align(self, timestamp: np.datetime64) -> np.datetime64:
        """
        Align ``timestamp`` according to the frequency.

        For example, for daily frequency, any timestamps that fall into the
        same day align to the same value.
        """
        name, multiple = self.np_freq
        timestamp = timestamp.astype(f"M8[{multiple}{name}]")

        if self.name == "W":
            offset = maybe.map_or(self.suffix, weekday_offsets.__getitem__, 0)
            dayofweek = timestamp.astype(int) - 4
            return timestamp - (dayofweek - offset) % 7

        return timestamp

[docs]    def shift(self, start: np.datetime64, count: int) -> np.datetime64:
        if self.name == "B":
            return np.busday_offset(start, self.n * count)

        return start + self.step * count

[docs]    def range(self, start: np.datetime64, count: int) -> np.ndarray:
        if self.name == "B":
            # We first collect all days, even non business days to then filter
            # for business days, of which we then take, each n-th.
            periods = np.arange(start, np.busday_offset(start, count * self.n))
            periods = periods[np.is_busday(periods)]
            return periods[:: self.n]

        return np.arange(start, count * self.step, self.step)

    def __str__(self) -> str:
        return _canonical_freqstr(self.n, self.name, self.suffix)


[docs]def freq(arg) -> Freq:
    if isinstance(arg, Freq):
        return arg

    return Freq.from_pandas(arg)