Source code for gluonts.nursery.spliced_binned_pareto.data_functions

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from typing import List

import numpy as np
from scipy import stats
import torch


[docs]def add_spikes(ts: torch.Tensor, only_upper_spikes: bool = False): """ Adds spikes to 15% of the time series in the form of heavy-tailed (Generalized Pareto) realizations Arguments: ts: time series only_upper_spikes: boolean to indicate upper-tailed or two-tailed spikes """ num_spikes = int(0.15 * ts.shape[0]) indices_for_gp_spikes = np.random.choice( np.arange(len(ts)), replace=False, size=num_spikes ) spike_direction = np.random.choice([-1, 1], replace=True, size=num_spikes) if only_upper_spikes: spike_direction = np.ones_like(spike_direction) spikes = stats.genpareto(1 / 50).rvs(num_spikes) * spike_direction ts[indices_for_gp_spikes] += spikes return ts
[docs]def create_ds( num_points: int, t_dof: int = 10, noise_mult: float = 0.25, points_per_sinusoid: int = 100, magnitude_sin: float = 1, ): """ Creates noisy sinusoid. (Noise distributed as student t with degrees of freedom = t_dof.) Returns tensor of shape (1, 1, num_points). Arguments: num_points: int, number of points in the dataset. t_dof: int, degrees of freedom for student t distribution. noise_mult: float, standard deviation. points_per_sinusoid: int, datapoints per sine period magnitude_sin: float, magnitude of sine amplitude """ end = num_points / points_per_sinusoid * 2 * np.pi sinusoid = np.sin(np.linspace(0, end, num_points)) * magnitude_sin noise = np.random.standard_t(t_dof, num_points) * noise_mult values = sinusoid + noise + 5.0 values = add_spikes(values) return torch.tensor(values.reshape(1, 1, -1))
[docs]def create_ds_iid(num_points: int, noise_mult: float = 0.25): """Creates heavy-tailed gaussian iid. Returns tensor of shape (1, 1, num_points). Arguments: num_points: int, number of points in the dataset. noise_mult: float, standard deviation """ values = np.random.normal(0, noise_mult, size=num_points) values = add_spikes(values) return torch.tensor(values.reshape(1, 1, -1))
[docs]def add_spikes_asymmetric( ts: torch.Tensor, xi: List[float] = [1 / 50.0, 1 / 25.0] ): """ Adds spikes to 15% of the time series in the form of heavy-tailed (Generalized Pareto) realizations Arguments: ts: time series xi: [float, float], GenPareto heaviness parameter for [lower, upper] noise respectively """ num_spikes = int(0.15 * ts.shape[0]) half_num_spikes = [int(num_spikes / 2)] half_num_spikes.append(num_spikes - half_num_spikes[0]) spike_direction = [-1, 1] indices_for_gp_spikes = np.random.choice( np.arange(len(ts)), replace=False, size=num_spikes ) idx = 0 spikes = ( stats.genpareto(xi[idx]).rvs(half_num_spikes[idx]) * spike_direction[idx] ) ts[indices_for_gp_spikes[: half_num_spikes[0]]] += spikes idx = 1 spikes = ( stats.genpareto(xi[idx]).rvs(half_num_spikes[idx]) * spike_direction[idx] ) ts[indices_for_gp_spikes[half_num_spikes[0] :]] += spikes return ts
[docs]def create_ds_asymmetric( num_points: int, t_dof: List[float] = [10, 10], noise_mult: List[float] = [0.25, 0.25], xi: List[float] = [1 / 50.0, 1 / 25.0], points_per_sinusoid: int = 100, magnitude_sin: float = 1, ): """ Creates noisy sinusoid. (Noise distributed as student t with degrees of freedom = t_dof.) Returns tensor of shape (1, 1, num_points). Arguments: num_points: int, number of points in the dataset. t_dof: [int, int], degrees of freedom for Students'-t distribution for [lower, upper] noise respectively. noise_mult: [float, float], standard deviation for [lower, upper] noise respectively. points_per_sinusoid: int, datapoints per sine period magnitude_sin: float, magnitude of sine amplitude """ end = num_points / points_per_sinusoid * 2 * np.pi sinusoid = np.sin(np.linspace(0, end, num_points)) * magnitude_sin idx = 0 noise_lower = ( np.random.standard_t(t_dof[idx], num_points) * noise_mult[idx] ) noise_lower = np.minimum(0, noise_lower) idx = 1 noise_upper = ( np.random.standard_t(t_dof[idx], num_points) * noise_mult[idx] ) noise_upper = np.maximum(0, noise_upper) values = sinusoid + noise_lower + noise_upper + 5.0 values = add_spikes_asymmetric(values, xi=xi) return torch.tensor(values.reshape(1, 1, -1))