Source code for gluonts.mx.model.transformer.trans_encoder

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from typing import Dict

from mxnet.gluon import HybridBlock

from gluonts.core.component import validated
from gluonts.mx import Tensor

from .layers import (
    InputLayer,
    MultiHeadSelfAttention,
    TransformerFeedForward,
    TransformerProcessBlock,
)


[docs]class TransformerEncoder(HybridBlock): @validated() def __init__(self, encoder_length: int, config: Dict, **kwargs) -> None: super().__init__(**kwargs) self.encoder_length = encoder_length with self.name_scope(): self.enc_input_layer = InputLayer(model_size=config["model_dim"]) self.enc_pre_self_att = TransformerProcessBlock( sequence=config["pre_seq"], dropout=config["dropout_rate"], prefix="pretransformerprocessblock_", ) self.enc_self_att = MultiHeadSelfAttention( att_dim_in=config["model_dim"], heads=config["num_heads"], att_dim_out=config["model_dim"], dropout=config["dropout_rate"], prefix="multiheadselfattention_", ) self.enc_post_self_att = TransformerProcessBlock( sequence=config["post_seq"], dropout=config["dropout_rate"], prefix="postselfatttransformerprocessblock_", ) self.enc_ff = TransformerFeedForward( inner_dim=config["model_dim"] * config["inner_ff_dim_scale"], out_dim=config["model_dim"], act_type=config["act_type"], dropout=config["dropout_rate"], prefix="transformerfeedforward_", ) self.enc_post_ff = TransformerProcessBlock( sequence=config["post_seq"], dropout=config["dropout_rate"], prefix="postfftransformerprocessblock_", )
[docs] def hybrid_forward(self, F, data: Tensor) -> Tensor: """ A transformer encoder block consists of a self-attention and a feed- forward layer with pre/post process blocks in between. """ # input layer inputs = self.enc_input_layer(data) # self-attention data_self_att, _ = self.enc_self_att( self.enc_pre_self_att(inputs, None) ) data = self.enc_post_self_att(data_self_att, inputs) # feed-forward data_ff = self.enc_ff(data) data = self.enc_post_ff(data_ff, data) return data