|
|
|
@ -24,7 +24,9 @@ __all__ = [
|
|
|
|
|
|
|
|
|
|
import copy
|
|
|
|
|
import collections
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
import paddle
|
|
|
|
|
from .common import Linear, Dropout
|
|
|
|
|
from .norm import LayerNorm
|
|
|
|
|
from .. import functional as F
|
|
|
|
@ -1174,3 +1176,39 @@ class Transformer(Layer):
|
|
|
|
|
output = self.decoder(
|
|
|
|
|
tgt, memory, tgt_mask=tgt_mask, memory_mask=memory_mask)
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
def generate_square_subsequent_mask(self, length):
|
|
|
|
|
"""
|
|
|
|
|
Generate a square mask for the sequence. The mask ensures that the
|
|
|
|
|
predictions for position i can depend only on the known outputs at
|
|
|
|
|
positions less than i.
|
|
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
|
length (int|Tensor): The length of sequence.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Tensor: Generated square mask according to the given length.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle
|
|
|
|
|
from paddle.nn.layer.transformer import Transformer
|
|
|
|
|
length = 5
|
|
|
|
|
d_model, n_head, dim_feedforward = 8, 4, 64
|
|
|
|
|
transformer_paddle = Transformer(
|
|
|
|
|
d_model, n_head, dim_feedforward=dim_feedforward)
|
|
|
|
|
mask = transformer_paddle.generate_square_subsequent_mask(length)
|
|
|
|
|
print(mask.numpy())
|
|
|
|
|
|
|
|
|
|
# [[ 0. -inf -inf -inf -inf]
|
|
|
|
|
# [ 0. 0. -inf -inf -inf]
|
|
|
|
|
# [ 0. 0. 0. -inf -inf]
|
|
|
|
|
# [ 0. 0. 0. 0. -inf]
|
|
|
|
|
# [ 0. 0. 0. 0. 0.]]
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
return paddle.tensor.triu(
|
|
|
|
|
(paddle.ones(
|
|
|
|
|
(length, length), dtype=paddle.get_default_dtype()) * -np.inf),
|
|
|
|
|
1)
|
|
|
|
|