Source code for gluonts.mx.block.snmlp

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

# Standard library imports
from typing import List, Tuple
import warnings

# Third-party imports
import mxnet as mx
from mxnet.ndarray import linalg_gemm2 as gemm2

# First-party imports
from gluonts.mx import Tensor
from gluonts.core.component import validated
from gluonts.mx.util import _broadcast_param
from gluonts.mx.activation import get_activation, get_activation_deriv
from gluonts.mx.block.sndense import SNDense


[docs]def jacobian_sn_mlp_block_bf( layers: List[Tuple[mx.gluon.HybridBlock, Tensor]] ) -> Tensor: """ Brute force computation of the jacobian of a SNMlpBlock jac is of shape (Batch dim1, ..., Output dim, Input dim) Parameters ---------- layers A list of tuples where each tuple (layer, input) is associated to a composing layer of the SNMLPBlock, where layer corresponds to the associated object layer, along with its input tensor. Returns ------- Tensor Jacobian of the SNMLPBlock computed at a given input """ for i, (layer, input) in enumerate(layers[::-1]): if isinstance(layer, SNDense): # broadcast weight of size (Output dim, Input dim) # to (Batch dim1, ..., Output dim, Input dim) jac_t = _broadcast_param( layer.weight, axes=range(len(input.shape[:-1])), sizes=input.shape[:-1], ) if i == 0: jac = jac_t else: jac = gemm2(jac, jac_t) else: # act_deriv is of shape (Batch dim1, ..., Input dim) act_deriv = get_activation_deriv(layer)(mx.ndarray, input) # to (Batch dim1, ..., Output dim, Input dim) to fit the jacobian jac_t = act_deriv.expand_dims(len(jac.shape[:-2])).broadcast_axes( axis=len(jac.shape[:-2]), size=jac.shape[-2] ) jac = jac * jac_t return jac
[docs]class SNMLPBlock(mx.gluon.HybridBlock): @validated() def __init__( self, in_units: int, hidden_units: int, out_units: int, num_hidden_layers: int = 2, activation: str = "lipswish", jacobian_method: str = "bf", num_power_iter: int = 1, coeff: float = 0.9, flatten: bool = False, ): super().__init__() self._in_units = in_units self._hidden_units = hidden_units self._out_units = out_units self._num_hidden_layers = num_hidden_layers self._activation = activation self._jacobian_method = jacobian_method self._num_power_iter = num_power_iter self._coeff = coeff self._weight_initializer = mx.init.Orthogonal(scale=self._coeff) self._bias_initializer = "zeros" self._flatten = flatten self._cached_inputs: List[Tensor] = [] in_dim = self._in_units with self.name_scope(): self._layers: List[mx.gluon.HybridBlock] = [] for i in range(self._num_hidden_layers): lin = SNDense( self._hidden_units, in_units=in_dim, activation=None, num_power_iter=self._num_power_iter, weight_initializer=self._weight_initializer, bias_initializer=self._bias_initializer, coeff=self._coeff, flatten=self._flatten, ) act = get_activation( self._activation, prefix=self._activation + str(i) ) in_dim = self._hidden_units self.register_child(lin) self.register_child(act) self._layers += [lin, act] last_lin = SNDense( self._out_units, in_units=in_dim, activation=None, num_power_iter=self._num_power_iter, weight_initializer=self._weight_initializer, bias_initializer=self._bias_initializer, coeff=self._coeff, flatten=self._flatten, ) self.register_child(last_lin) self._layers += [last_lin]
[docs] def get_weights(self): return [ layer.weight for layer in self._layers if isinstance(layer, SNDense) ]
# noinspection PyMethodOverriding
[docs] def hybrid_forward(self, F, x: Tensor) -> Tensor: """ Parameters ---------- x Input Tensor Returns ------- Tensor output of SNMLPBlock """ self._cached_inputs = [] for layer in self._layers: self._cached_inputs += [x] x = layer(x) return x
[docs] def jacobian(self, x: Tensor) -> Tensor: """ Parameters ---------- x Input Tensor Returns ------- Tensor Jacobian of the SNMLPBlock evaluated at x. """ if self._jacobian_method == "ignore": return x * 0 elif self._jacobian_method == "bf": if ( len(self._cached_inputs) > 0 and self._cached_inputs[0] is not x ): warnings.warn( "Input not the same, recomputing forward for jacobian term..." ) self(x) return jacobian_sn_mlp_block_bf( [(l, i) for l, i in zip(self._layers, self._cached_inputs)] ) raise NotImplementedError(self._jacobian_method)