|
|
@ -4,8 +4,9 @@ import numpy as np
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
import paddle.fluid.layers as layers
|
|
|
|
import paddle.fluid.layers as layers
|
|
|
|
|
|
|
|
|
|
|
|
from .desc import *
|
|
|
|
# Set seed for CE
|
|
|
|
from .config import ModelHyperParams,TrainTaskConfig
|
|
|
|
dropout_seed = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def wrap_layer_with_block(layer, block_idx):
|
|
|
|
def wrap_layer_with_block(layer, block_idx):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -269,23 +270,24 @@ pre_process_layer = partial(pre_post_process_layer, None)
|
|
|
|
post_process_layer = pre_post_process_layer
|
|
|
|
post_process_layer = pre_post_process_layer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_encoder(src_word,#[b,t,c]
|
|
|
|
def prepare_encoder(
|
|
|
|
src_pos,
|
|
|
|
src_word, #[b,t,c]
|
|
|
|
src_vocab_size,
|
|
|
|
src_pos,
|
|
|
|
src_emb_dim,
|
|
|
|
src_vocab_size,
|
|
|
|
src_max_len,
|
|
|
|
src_emb_dim,
|
|
|
|
dropout_rate=0.,
|
|
|
|
src_max_len,
|
|
|
|
bos_idx=0,
|
|
|
|
dropout_rate=0.,
|
|
|
|
word_emb_param_name=None,
|
|
|
|
bos_idx=0,
|
|
|
|
pos_enc_param_name=None):
|
|
|
|
word_emb_param_name=None,
|
|
|
|
|
|
|
|
pos_enc_param_name=None):
|
|
|
|
"""Add word embeddings and position encodings.
|
|
|
|
"""Add word embeddings and position encodings.
|
|
|
|
The output tensor has a shape of:
|
|
|
|
The output tensor has a shape of:
|
|
|
|
[batch_size, max_src_length_in_batch, d_model].
|
|
|
|
[batch_size, max_src_length_in_batch, d_model].
|
|
|
|
This module is used at the bottom of the encoder stacks.
|
|
|
|
This module is used at the bottom of the encoder stacks.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
src_word_emb =src_word#layers.concat(res,axis=1)
|
|
|
|
src_word_emb = src_word #layers.concat(res,axis=1)
|
|
|
|
src_word_emb=layers.cast(src_word_emb,'float32')
|
|
|
|
src_word_emb = layers.cast(src_word_emb, 'float32')
|
|
|
|
# print("src_word_emb",src_word_emb)
|
|
|
|
# print("src_word_emb",src_word_emb)
|
|
|
|
|
|
|
|
|
|
|
|
src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
|
|
|
|
src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
|
|
|
@ -302,14 +304,14 @@ def prepare_encoder(src_word,#[b,t,c]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_decoder(src_word,
|
|
|
|
def prepare_decoder(src_word,
|
|
|
|
src_pos,
|
|
|
|
src_pos,
|
|
|
|
src_vocab_size,
|
|
|
|
src_vocab_size,
|
|
|
|
src_emb_dim,
|
|
|
|
src_emb_dim,
|
|
|
|
src_max_len,
|
|
|
|
src_max_len,
|
|
|
|
dropout_rate=0.,
|
|
|
|
dropout_rate=0.,
|
|
|
|
bos_idx=0,
|
|
|
|
bos_idx=0,
|
|
|
|
word_emb_param_name=None,
|
|
|
|
word_emb_param_name=None,
|
|
|
|
pos_enc_param_name=None):
|
|
|
|
pos_enc_param_name=None):
|
|
|
|
"""Add word embeddings and position encodings.
|
|
|
|
"""Add word embeddings and position encodings.
|
|
|
|
The output tensor has a shape of:
|
|
|
|
The output tensor has a shape of:
|
|
|
|
[batch_size, max_src_length_in_batch, d_model].
|
|
|
|
[batch_size, max_src_length_in_batch, d_model].
|
|
|
@ -323,7 +325,7 @@ def prepare_decoder(src_word,
|
|
|
|
name=word_emb_param_name,
|
|
|
|
name=word_emb_param_name,
|
|
|
|
initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5)))
|
|
|
|
initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5)))
|
|
|
|
# print("target_word_emb",src_word_emb)
|
|
|
|
# print("target_word_emb",src_word_emb)
|
|
|
|
src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim ** 0.5)
|
|
|
|
src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
|
|
|
|
src_pos_enc = layers.embedding(
|
|
|
|
src_pos_enc = layers.embedding(
|
|
|
|
src_pos,
|
|
|
|
src_pos,
|
|
|
|
size=[src_max_len, src_emb_dim],
|
|
|
|
size=[src_max_len, src_emb_dim],
|
|
|
@ -335,6 +337,7 @@ def prepare_decoder(src_word,
|
|
|
|
enc_input, dropout_prob=dropout_rate, seed=dropout_seed,
|
|
|
|
enc_input, dropout_prob=dropout_rate, seed=dropout_seed,
|
|
|
|
is_test=False) if dropout_rate else enc_input
|
|
|
|
is_test=False) if dropout_rate else enc_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# prepare_encoder = partial(
|
|
|
|
# prepare_encoder = partial(
|
|
|
|
# prepare_encoder_decoder, pos_enc_param_name=pos_enc_param_names[0])
|
|
|
|
# prepare_encoder_decoder, pos_enc_param_name=pos_enc_param_names[0])
|
|
|
|
# prepare_decoder = partial(
|
|
|
|
# prepare_decoder = partial(
|
|
|
@ -595,21 +598,9 @@ def transformer(src_vocab_size,
|
|
|
|
weights = all_inputs[-1]
|
|
|
|
weights = all_inputs[-1]
|
|
|
|
|
|
|
|
|
|
|
|
enc_output = wrap_encoder(
|
|
|
|
enc_output = wrap_encoder(
|
|
|
|
src_vocab_size,
|
|
|
|
src_vocab_size, 64, n_layer, n_head, d_key, d_value, d_model,
|
|
|
|
ModelHyperParams.src_seq_len,
|
|
|
|
d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout,
|
|
|
|
n_layer,
|
|
|
|
preprocess_cmd, postprocess_cmd, weight_sharing, enc_inputs)
|
|
|
|
n_head,
|
|
|
|
|
|
|
|
d_key,
|
|
|
|
|
|
|
|
d_value,
|
|
|
|
|
|
|
|
d_model,
|
|
|
|
|
|
|
|
d_inner_hid,
|
|
|
|
|
|
|
|
prepostprocess_dropout,
|
|
|
|
|
|
|
|
attention_dropout,
|
|
|
|
|
|
|
|
relu_dropout,
|
|
|
|
|
|
|
|
preprocess_cmd,
|
|
|
|
|
|
|
|
postprocess_cmd,
|
|
|
|
|
|
|
|
weight_sharing,
|
|
|
|
|
|
|
|
enc_inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
predict = wrap_decoder(
|
|
|
|
predict = wrap_decoder(
|
|
|
|
trg_vocab_size,
|
|
|
|
trg_vocab_size,
|
|
|
@ -650,21 +641,21 @@ def transformer(src_vocab_size,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def wrap_encoder_forFeature(src_vocab_size,
|
|
|
|
def wrap_encoder_forFeature(src_vocab_size,
|
|
|
|
max_length,
|
|
|
|
max_length,
|
|
|
|
n_layer,
|
|
|
|
n_layer,
|
|
|
|
n_head,
|
|
|
|
n_head,
|
|
|
|
d_key,
|
|
|
|
d_key,
|
|
|
|
d_value,
|
|
|
|
d_value,
|
|
|
|
d_model,
|
|
|
|
d_model,
|
|
|
|
d_inner_hid,
|
|
|
|
d_inner_hid,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
attention_dropout,
|
|
|
|
attention_dropout,
|
|
|
|
relu_dropout,
|
|
|
|
relu_dropout,
|
|
|
|
preprocess_cmd,
|
|
|
|
preprocess_cmd,
|
|
|
|
postprocess_cmd,
|
|
|
|
postprocess_cmd,
|
|
|
|
weight_sharing,
|
|
|
|
weight_sharing,
|
|
|
|
enc_inputs=None,
|
|
|
|
enc_inputs=None,
|
|
|
|
bos_idx=0):
|
|
|
|
bos_idx=0):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
The wrapper assembles together all needed layers for the encoder.
|
|
|
|
The wrapper assembles together all needed layers for the encoder.
|
|
|
|
img, src_pos, src_slf_attn_bias = enc_inputs
|
|
|
|
img, src_pos, src_slf_attn_bias = enc_inputs
|
|
|
@ -676,8 +667,8 @@ def wrap_encoder_forFeature(src_vocab_size,
|
|
|
|
conv_features, src_pos, src_slf_attn_bias = make_all_inputs(
|
|
|
|
conv_features, src_pos, src_slf_attn_bias = make_all_inputs(
|
|
|
|
encoder_data_input_fields)
|
|
|
|
encoder_data_input_fields)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
conv_features, src_pos, src_slf_attn_bias = enc_inputs#
|
|
|
|
conv_features, src_pos, src_slf_attn_bias = enc_inputs #
|
|
|
|
b,t,c = conv_features.shape
|
|
|
|
b, t, c = conv_features.shape
|
|
|
|
#"""
|
|
|
|
#"""
|
|
|
|
# insert cnn
|
|
|
|
# insert cnn
|
|
|
|
#"""
|
|
|
|
#"""
|
|
|
@ -718,7 +709,7 @@ def wrap_encoder_forFeature(src_vocab_size,
|
|
|
|
max_length,
|
|
|
|
max_length,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
bos_idx=bos_idx,
|
|
|
|
bos_idx=bos_idx,
|
|
|
|
word_emb_param_name=word_emb_param_names[0])
|
|
|
|
word_emb_param_name="src_word_emb_table")
|
|
|
|
|
|
|
|
|
|
|
|
enc_output = encoder(
|
|
|
|
enc_output = encoder(
|
|
|
|
enc_input,
|
|
|
|
enc_input,
|
|
|
@ -736,6 +727,7 @@ def wrap_encoder_forFeature(src_vocab_size,
|
|
|
|
postprocess_cmd, )
|
|
|
|
postprocess_cmd, )
|
|
|
|
return enc_output
|
|
|
|
return enc_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def wrap_encoder(src_vocab_size,
|
|
|
|
def wrap_encoder(src_vocab_size,
|
|
|
|
max_length,
|
|
|
|
max_length,
|
|
|
|
n_layer,
|
|
|
|
n_layer,
|
|
|
@ -762,7 +754,7 @@ def wrap_encoder(src_vocab_size,
|
|
|
|
src_word, src_pos, src_slf_attn_bias = make_all_inputs(
|
|
|
|
src_word, src_pos, src_slf_attn_bias = make_all_inputs(
|
|
|
|
encoder_data_input_fields)
|
|
|
|
encoder_data_input_fields)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
src_word, src_pos, src_slf_attn_bias = enc_inputs#
|
|
|
|
src_word, src_pos, src_slf_attn_bias = enc_inputs #
|
|
|
|
#"""
|
|
|
|
#"""
|
|
|
|
# insert cnn
|
|
|
|
# insert cnn
|
|
|
|
#"""
|
|
|
|
#"""
|
|
|
@ -802,7 +794,7 @@ def wrap_encoder(src_vocab_size,
|
|
|
|
max_length,
|
|
|
|
max_length,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
bos_idx=bos_idx,
|
|
|
|
bos_idx=bos_idx,
|
|
|
|
word_emb_param_name=word_emb_param_names[0])
|
|
|
|
word_emb_param_name="src_word_emb_table")
|
|
|
|
|
|
|
|
|
|
|
|
enc_output = encoder(
|
|
|
|
enc_output = encoder(
|
|
|
|
enc_input,
|
|
|
|
enc_input,
|
|
|
@ -858,8 +850,8 @@ def wrap_decoder(trg_vocab_size,
|
|
|
|
max_length,
|
|
|
|
max_length,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
prepostprocess_dropout,
|
|
|
|
bos_idx=bos_idx,
|
|
|
|
bos_idx=bos_idx,
|
|
|
|
word_emb_param_name=word_emb_param_names[0]
|
|
|
|
word_emb_param_name="src_word_emb_table"
|
|
|
|
if weight_sharing else word_emb_param_names[1])
|
|
|
|
if weight_sharing else "trg_word_emb_table")
|
|
|
|
dec_output = decoder(
|
|
|
|
dec_output = decoder(
|
|
|
|
dec_input,
|
|
|
|
dec_input,
|
|
|
|
enc_output,
|
|
|
|
enc_output,
|
|
|
@ -886,7 +878,7 @@ def wrap_decoder(trg_vocab_size,
|
|
|
|
predict = layers.matmul(
|
|
|
|
predict = layers.matmul(
|
|
|
|
x=dec_output,
|
|
|
|
x=dec_output,
|
|
|
|
y=fluid.default_main_program().global_block().var(
|
|
|
|
y=fluid.default_main_program().global_block().var(
|
|
|
|
word_emb_param_names[0]),
|
|
|
|
"trg_word_emb_table"),
|
|
|
|
transpose_y=True)
|
|
|
|
transpose_y=True)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
predict = layers.fc(input=dec_output,
|
|
|
|
predict = layers.fc(input=dec_output,
|
|
|
@ -931,12 +923,13 @@ def fast_decode(src_vocab_size,
|
|
|
|
|
|
|
|
|
|
|
|
enc_inputs_len = len(encoder_data_input_fields)
|
|
|
|
enc_inputs_len = len(encoder_data_input_fields)
|
|
|
|
dec_inputs_len = len(fast_decoder_data_input_fields)
|
|
|
|
dec_inputs_len = len(fast_decoder_data_input_fields)
|
|
|
|
enc_inputs = all_inputs[0:enc_inputs_len]#enc_inputs tensor
|
|
|
|
enc_inputs = all_inputs[0:enc_inputs_len] #enc_inputs tensor
|
|
|
|
dec_inputs = all_inputs[enc_inputs_len:enc_inputs_len + dec_inputs_len]#dec_inputs tensor
|
|
|
|
dec_inputs = all_inputs[enc_inputs_len:enc_inputs_len +
|
|
|
|
|
|
|
|
dec_inputs_len] #dec_inputs tensor
|
|
|
|
|
|
|
|
|
|
|
|
enc_output = wrap_encoder(
|
|
|
|
enc_output = wrap_encoder(
|
|
|
|
src_vocab_size,
|
|
|
|
src_vocab_size,
|
|
|
|
ModelHyperParams.src_seq_len,##to do !!!!!????
|
|
|
|
64, ##to do !!!!!????
|
|
|
|
n_layer,
|
|
|
|
n_layer,
|
|
|
|
n_head,
|
|
|
|
n_head,
|
|
|
|
d_key,
|
|
|
|
d_key,
|
|
|
|