Source code for gluonts.zebras._period

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

"""
``zebras.Period`` and ``zebras.Periods`` are classes to track points in time
with periodicity. They provide similar functionality to ``pandas.Period`` and
``pandas.PeriodIndex`` but offer some advantages:

* more consistent API
* improved ergonomics
* better performance

Both classes are just thin layers around ``numpy.datetime64`` objects, but
provide a more usable interface.

Both classes have easier to use factory-functions in ``zebras.period`` and
``zebras.periods``, akin to how ``numpy.ndarray`` objects are usually
constructed using ``numpy.array``.

While ``zebras.Period`` represents a single time-stamp, ``zebras.Periods`` are
a set of equidistant timestamps where the gap between consecutive timestamps
is the period.


``py
first = zb.period("2020-01", "3M")
index = zb.periods("2020-01", "3M", 3)

first.periods(3) == index
first == index[0]
```
"""

from __future__ import annotations

import datetime
import functools
from dataclasses import dataclass
from typing import Any, Union, Optional, Tuple, cast, overload

import numpy as np
from dateutil.parser import parse as du_parse

from gluonts.core import serde

from ._freq import Freq


def _is_number(value):
    return isinstance(value, (int, np.integer))


class _BasePeriod:
    data: Any
    freq: Freq

    @property
    def freqstr(self) -> str:
        return str(self.freq)

    @property
    def year(self) -> np.ndarray:
        return self.data.astype("M8[Y]").astype(int) + 1970

    @property
    def month(self) -> np.ndarray:
        return self.data.astype("M8[M]").astype(int) % 12 + 1

    @property
    def day(self) -> np.ndarray:
        return (self.data.astype("M8[D]") - self.data.astype("M8[M]")).astype(
            int
        ) + 1

    @property
    def hour(self) -> np.ndarray:
        return (self.data.astype("M8[h]") - self.data.astype("M8[D]")).astype(
            int
        )

    @property
    def minute(self) -> np.ndarray:
        return (self.data.astype("M8[m]") - self.data.astype("M8[h]")).astype(
            int
        )

    @property
    def second(self) -> np.ndarray:
        return (self.data.astype("M8[s]") - self.data.astype("M8[m]")).astype(
            int
        )

    @property
    def dayofweek(self) -> np.ndarray:
        return (self.data.astype("M8[D]").astype(int) - 4) % 7

    @property
    def dayofyear(self) -> np.ndarray:
        return (self.data.astype("M8[D]") - self.data.astype("M8[Y]")).astype(
            int
        ) + 1

    @property
    def week(self) -> np.ndarray:
        # Note: In Python 3.9 `isocalendar()` returns a named tuple, but we
        # need to support 3.7 and 3.8, so we use index one for the week.
        return np.array(
            [
                cal.isocalendar()[1]
                for cal in self.data.astype(datetime.datetime)
            ]
        )

    def __add__(self, other):
        if _is_number(other):
            return self.__class__(
                self.freq.shift(self.data, other),
                self.freq,
            )

    def __sub__(self, other):
        if _is_number(other):
            return self.__class__(
                self.freq.shift(self.data, -other),
                self.freq,
            )

        else:
            return self.data - other.data

        raise ValueError(other)


[docs]@functools.total_ordering
@dataclass
class Period(_BasePeriod):
    data: np.datetime64
    freq: Freq

    @property
    def __init_passed_kwargs__(self) -> dict:
        return {"data": self.data, "freq": self.freq}

[docs]    def periods(self, count: int):
        return Periods(
            self.freq.range(self.data, count),
            self.freq,
        )

[docs]    def to_pandas(self):
        import pandas as pd

        return pd.Period(self.data.astype(object), self.freq.to_pandas())

[docs]    def to_timestamp(self):
        return self.data.astype(object)

[docs]    def unix_epoch(self) -> int:
        return self.to_numpy().astype("M8[s]").astype(int)

    def __repr__(self) -> str:
        return f"Period<{self.data}, {self.freq}>"

    def __lt__(self, other: Period) -> bool:
        # convert numpy.bool_ into bool
        return cast(bool, self.data < other.data)

[docs]    def to_numpy(self) -> np.datetime64:
        return self.data

    @property
    def shape(self) -> Tuple[int, ...]:
        return self.data.shape

    def __array__(self) -> np.datetime64:
        return self.data


[docs]@dataclass
class Periods(_BasePeriod):
    data: np.ndarray
    freq: Freq

    @property
    def start(self) -> Period:
        return self[0]

    @property
    def end(self) -> Period:
        """
        Last timestamp.

        >>> p = periods("2021", "D", 365)
        >>> assert p.end == period("2021-12-31", "D")
        """

        return self[-1]

[docs]    def head(self, count: int) -> Periods:
        """
        First ``count`` timestamps.

        >>> p = periods("2021", "D", 365)
        >>> assert p.head(5) == periods("2021-01-01", "D", 5)
        """

        return self[:count]

[docs]    def tail(self, count: int) -> Periods:
        """
        Last ``count`` timestamps.

        >>> p = periods("2021", "D", 365)
        >>> assert p.tail(5) == periods("2021-12-27", "D", 5)
        """

        return self[-count:]

[docs]    def future(self, count: int) -> Periods:
        """
        Next ``count`` timestamps.

        >>> p = periods("2021", "D", 365)
        >>> assert p.future(5) == periods("2022-01-01", "D", 5)
        """
        return (self.end + 1).periods(count)

[docs]    def past(self, count: int) -> Periods:
        """
        Previous ``count`` timestamps.

        >>> p = periods("2021", "D", 365)
        >>> assert p.past(5) == periods("2020-12-27", "D", 5)
        """

        return (self.start - count).periods(count)

[docs]    def prepend(self, count: int) -> Periods:
        """
        Copy which contains ``count`` past timestamps.

        >>> p = periods("2021", "D", 365)
        >>> assert p.prepend(5) == periods("2020-12-27", "D", 370)
        """
        return Periods(
            np.concatenate([self.past(count).data, self.data]),
            self.freq,
        )

[docs]    def extend(self, count: int) -> Periods:
        """
        Copy which contains ``count`` future timestamps.

        >>> p = periods("2021", "D", 365)
        >>> assert p.extend(5) == periods("2021", "D", 370)
        """
        return Periods(
            np.concatenate([self.data, self.future(count).data]),
            self.freq,
        )

[docs]    def to_pandas(self):
        import pandas as pd

        # older versions of pandas expect ns-datetime64
        return pd.PeriodIndex(
            self.data.astype("M8[ns]"), freq=self.freq.to_pandas()
        )

[docs]    @classmethod
    def from_pandas(cls, index):
        """
        Turn ``pandas.PeriodIndex`` or ``pandas.DatetimeIndex`` into
        ``Periods``.
        """

        import pandas as pd

        if isinstance(index, pd.DatetimeIndex):
            index = index.to_period()
        else:
            assert isinstance(index, pd.PeriodIndex)

        freq = Freq.from_pandas(index.freqstr)
        np_index = np.array(index.asi8, dtype=f"M8[{freq.np_freq[0]}]")
        assert np.all(np.diff(np_index).astype(int) == freq.n)

        return Periods(np_index, freq)

[docs]    def intersection(self, other):
        # TODO: Is this needed?
        return self.data[np.in1d(self, other)]

[docs]    def index_of(self, period: Union[str, Period]):
        """
        Return the index of ``period``

        >>> p = periods("2021", "D", 365)
        >>> assert p.index_of(period("2021-02-01", "D")) == 31
        """

        if isinstance(period, str):
            period = Period(
                np.datetime64(du_parse(period), self.freq.np_freq), self.freq
            )

        idx = (period - self.start).astype(int) // self.freq.step
        assert 0 <= idx <= len(self), idx

        return idx

    def __len__(self):
        return len(self.data)

    @overload
    def __getitem__(self, idx: int) -> Period: ...

    @overload
    def __getitem__(self, idx: slice) -> Periods: ...

    def __getitem__(self, idx):
        if _is_number(idx):
            return Period(self.data[idx], self.freq)

        return Periods(self.data[idx], self.freq)

    def __eq__(self, other):
        if not isinstance(other, Periods):
            return False

        return len(self) == len(other) and self.start == other.start

[docs]    def to_numpy(self) -> np.ndarray:
        return self.data

    @property
    def shape(self) -> Tuple[int, ...]:
        return self.data.shape

    def __array__(self) -> np.ndarray:
        return self.data

[docs]    def unix_epoch(self) -> np.ndarray:
        return self.to_numpy().astype("M8[s]").astype(int)


@serde.encode.register
def _encode_zebras_periods(v: Periods):
    return {
        "__kind__": "instance",
        "class": "gluonts.zebras.periods",
        "kwargs": serde.encode(
            {"start": v.start, "freq": str(v.freq), "count": len(v)}
        ),
    }


[docs]def period(
    data: Union[Period, str], freq: Optional[Union[Freq, str]] = None
) -> Period:
    """
    Create a ``zebras.Period`` object that represents a period of time.

    Parameters
    ----------
    data
        The time period represented by a string (e.g., "2023-01-01"),
        or another Period object.
    freq, optional
        The frequency of the period, e.g, "H" for hourly, by default None.

    Returns
    -------
        A ``zebras.Period`` object.
    """
    if freq is None:
        if hasattr(data, "freqstr"):
            freq = Freq.from_pandas(data.freqstr)
        else:
            raise ValueError("No frequency specified.")
    elif isinstance(freq, Freq):
        freq = freq
    elif isinstance(freq, str):
        freq = Freq.from_pandas(freq)
    else:
        raise ValueError(f"Unknown frequency type {type(freq)}.")

    data_: Any

    if isinstance(data, Period):
        data_ = data.data

    elif isinstance(data, str):
        data_ = du_parse(
            data,
            default=datetime.datetime(1970, 1, 1),
            ignoretz=True,
        )
    else:
        # TODO: should we add a check?
        data_ = data

    return Period(freq.align(np.datetime64(data_, freq.np_freq)), freq)


[docs]def periods(
    start: Union[Period, str], freq: Union[Freq, str], count: int
) -> Period:
    """
    Create a ``zebras.Periods`` object that represents multiple consecutive
    periods of time.

    Parameters
    ----------
    start
        The starting time period represented by a string (e.g., "2023-01-01"),
        or another Period object.
    freq
        The frequency of the period, e.g, "H" for hourly.
    count
        The number of periods.

    Returns
    -------
        A ``zebras.Periods`` object.
    """
    return period(start, freq).periods(count)