Source code for gluonts.zebras._period

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

"""
``zebras.Period`` and ``zebras.Periods`` are classes to track points in time
with periodicity. They provide similar functionality to ``pandas.Period`` and
``pandas.PeriodIndex`` but offer some advantages:

* more consistent API
* improved ergonomics
* better performance

Both classes are just thin layers around ``numpy.datetime64`` objects, but
provide a more usable interface.

Both classes have easier to use factory-functions in ``zebras.period`` and
``zebras.periods``, akin to how ``numpy.ndarray`` objects are usually
constructed using ``numpy.array``.

While ``zebras.Period`` represents a single time-stamp, ``zebras.Periods`` are
a set of equidistant timestamps where the gap between consecutive timestamps
is the period.


``py
first = zb.period("2020-01", "3M")
index = zb.periods("2020-01", "3M", 3)

first.periods(3) == index
first == index[0]
```
"""

from __future__ import annotations

import datetime
import functools
from dataclasses import dataclass
from typing import Any, Union, Optional, Tuple, cast, overload

import numpy as np
from dateutil.parser import parse as du_parse

from gluonts.core import serde

from ._freq import Freq


def _is_number(value):
    return isinstance(value, (int, np.integer))


class _BasePeriod:
    data: Any
    freq: Freq

    @property
    def freqstr(self) -> str:
        return str(self.freq)

    @property
    def year(self) -> np.ndarray:
        return self.data.astype("M8[Y]").astype(int) + 1970

    @property
    def month(self) -> np.ndarray:
        return self.data.astype("M8[M]").astype(int) % 12 + 1

    @property
    def day(self) -> np.ndarray:
        return (self.data.astype("M8[D]") - self.data.astype("M8[M]")).astype(
            int
        ) + 1

    @property
    def hour(self) -> np.ndarray:
        return (self.data.astype("M8[h]") - self.data.astype("M8[D]")).astype(
            int
        )

    @property
    def minute(self) -> np.ndarray:
        return (self.data.astype("M8[m]") - self.data.astype("M8[h]")).astype(
            int
        )

    @property
    def second(self) -> np.ndarray:
        return (self.data.astype("M8[s]") - self.data.astype("M8[m]")).astype(
            int
        )

    @property
    def dayofweek(self) -> np.ndarray:
        return (self.data.astype("M8[D]").astype(int) - 4) % 7

    @property
    def dayofyear(self) -> np.ndarray:
        return (self.data.astype("M8[D]") - self.data.astype("M8[Y]")).astype(
            int
        ) + 1

    @property
    def week(self) -> np.ndarray:
        # Note: In Python 3.9 `isocalendar()` returns a named tuple, but we
        # need to support 3.7 and 3.8, so we use index one for the week.
        return np.array(
            [
                cal.isocalendar()[1]
                for cal in self.data.astype(datetime.datetime)
            ]
        )

    def __add__(self, other):
        if _is_number(other):
            return self.__class__(
                self.freq.shift(self.data, other),
                self.freq,
            )

    def __sub__(self, other):
        if _is_number(other):
            return self.__class__(
                self.freq.shift(self.data, -other),
                self.freq,
            )

        else:
            return self.data - other.data

        raise ValueError(other)


[docs]@functools.total_ordering @dataclass class Period(_BasePeriod): data: np.datetime64 freq: Freq @property def __init_passed_kwargs__(self) -> dict: return {"data": self.data, "freq": self.freq}
[docs] def periods(self, count: int): return Periods( self.freq.range(self.data, count), self.freq, )
[docs] def to_pandas(self): import pandas as pd return pd.Period(self.data.astype(object), self.freq.to_pandas())
[docs] def to_timestamp(self): return self.data.astype(object)
[docs] def unix_epoch(self) -> int: return self.to_numpy().astype("M8[s]").astype(int)
def __repr__(self) -> str: return f"Period<{self.data}, {self.freq}>" def __lt__(self, other: Period) -> bool: # convert numpy.bool_ into bool return cast(bool, self.data < other.data)
[docs] def to_numpy(self) -> np.datetime64: return self.data
@property def shape(self) -> Tuple[int, ...]: return self.data.shape def __array__(self) -> np.datetime64: return self.data
[docs]@dataclass class Periods(_BasePeriod): data: np.ndarray freq: Freq @property def start(self) -> Period: return self[0] @property def end(self) -> Period: """ Last timestamp. >>> p = periods("2021", "D", 365) >>> assert p.end == period("2021-12-31", "D") """ return self[-1]
[docs] def head(self, count: int) -> Periods: """ First ``count`` timestamps. >>> p = periods("2021", "D", 365) >>> assert p.head(5) == periods("2021-01-01", "D", 5) """ return self[:count]
[docs] def tail(self, count: int) -> Periods: """ Last ``count`` timestamps. >>> p = periods("2021", "D", 365) >>> assert p.tail(5) == periods("2021-12-27", "D", 5) """ return self[-count:]
[docs] def future(self, count: int) -> Periods: """ Next ``count`` timestamps. >>> p = periods("2021", "D", 365) >>> assert p.future(5) == periods("2022-01-01", "D", 5) """ return (self.end + 1).periods(count)
[docs] def past(self, count: int) -> Periods: """ Previous ``count`` timestamps. >>> p = periods("2021", "D", 365) >>> assert p.past(5) == periods("2020-12-27", "D", 5) """ return (self.start - count).periods(count)
[docs] def prepend(self, count: int) -> Periods: """ Copy which contains ``count`` past timestamps. >>> p = periods("2021", "D", 365) >>> assert p.prepend(5) == periods("2020-12-27", "D", 370) """ return Periods( np.concatenate([self.past(count).data, self.data]), self.freq, )
[docs] def extend(self, count: int) -> Periods: """ Copy which contains ``count`` future timestamps. >>> p = periods("2021", "D", 365) >>> assert p.extend(5) == periods("2021", "D", 370) """ return Periods( np.concatenate([self.data, self.future(count).data]), self.freq, )
[docs] def to_pandas(self): import pandas as pd # older versions of pandas expect ns-datetime64 return pd.PeriodIndex( self.data.astype("M8[ns]"), freq=self.freq.to_pandas() )
[docs] @classmethod def from_pandas(cls, index): """ Turn ``pandas.PeriodIndex`` or ``pandas.DatetimeIndex`` into ``Periods``. """ import pandas as pd if isinstance(index, pd.DatetimeIndex): index = index.to_period() else: assert isinstance(index, pd.PeriodIndex) freq = Freq.from_pandas(index.freqstr) np_index = np.array(index.asi8, dtype=f"M8[{freq.np_freq[0]}]") assert np.all(np.diff(np_index).astype(int) == freq.n) return Periods(np_index, freq)
[docs] def intersection(self, other): # TODO: Is this needed? return self.data[np.in1d(self, other)]
[docs] def index_of(self, period: Union[str, Period]): """ Return the index of ``period`` >>> p = periods("2021", "D", 365) >>> assert p.index_of(period("2021-02-01", "D")) == 31 """ if isinstance(period, str): period = Period( np.datetime64(du_parse(period), self.freq.np_freq), self.freq ) idx = (period - self.start).astype(int) // self.freq.step assert 0 <= idx <= len(self), idx return idx
def __len__(self): return len(self.data) @overload def __getitem__(self, idx: int) -> Period: ... @overload def __getitem__(self, idx: slice) -> Periods: ... def __getitem__(self, idx): if _is_number(idx): return Period(self.data[idx], self.freq) return Periods(self.data[idx], self.freq) def __eq__(self, other): if not isinstance(other, Periods): return False return len(self) == len(other) and self.start == other.start
[docs] def to_numpy(self) -> np.ndarray: return self.data
@property def shape(self) -> Tuple[int, ...]: return self.data.shape def __array__(self) -> np.ndarray: return self.data
[docs] def unix_epoch(self) -> np.ndarray: return self.to_numpy().astype("M8[s]").astype(int)
@serde.encode.register def _encode_zebras_periods(v: Periods): return { "__kind__": "instance", "class": "gluonts.zebras.periods", "kwargs": serde.encode( {"start": v.start, "freq": str(v.freq), "count": len(v)} ), }
[docs]def period( data: Union[Period, str], freq: Optional[Union[Freq, str]] = None ) -> Period: """ Create a ``zebras.Period`` object that represents a period of time. Parameters ---------- data The time period represented by a string (e.g., "2023-01-01"), or another Period object. freq, optional The frequency of the period, e.g, "H" for hourly, by default None. Returns ------- A ``zebras.Period`` object. """ if freq is None: if hasattr(data, "freqstr"): freq = Freq.from_pandas(data.freqstr) else: raise ValueError("No frequency specified.") elif isinstance(freq, Freq): freq = freq elif isinstance(freq, str): freq = Freq.from_pandas(freq) else: raise ValueError(f"Unknown frequency type {type(freq)}.") data_: Any if isinstance(data, Period): data_ = data.data elif isinstance(data, str): data_ = du_parse( data, default=datetime.datetime(1970, 1, 1), ignoretz=True, ) else: # TODO: should we add a check? data_ = data return Period(freq.align(np.datetime64(data_, freq.np_freq)), freq)
[docs]def periods( start: Union[Period, str], freq: Union[Freq, str], count: int ) -> Period: """ Create a ``zebras.Periods`` object that represents multiple consecutive periods of time. Parameters ---------- start The starting time period represented by a string (e.g., "2023-01-01"), or another Period object. freq The frequency of the period, e.g, "H" for hourly. count The number of periods. Returns ------- A ``zebras.Periods`` object. """ return period(start, freq).periods(count)