|
|
|
@ -1,8 +1,4 @@
|
|
|
|
|
import paddle.v2.activation as activation
|
|
|
|
|
import paddle.v2.attr as attr
|
|
|
|
|
import paddle.v2.data_type as data_type
|
|
|
|
|
import paddle.v2.layer as layer
|
|
|
|
|
import paddle.v2.networks as networks
|
|
|
|
|
import paddle.v2 as paddle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
|
|
|
|
@ -12,64 +8,70 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
|
|
|
|
|
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
|
|
|
|
|
|
|
|
|
|
#### Encoder
|
|
|
|
|
src_word_id = layer.data(
|
|
|
|
|
src_word_id = paddle.layer.data(
|
|
|
|
|
name='source_language_word',
|
|
|
|
|
type=data_type.integer_value_sequence(source_dict_dim))
|
|
|
|
|
src_embedding = layer.embedding(
|
|
|
|
|
type=paddle.data_type.integer_value_sequence(source_dict_dim))
|
|
|
|
|
src_embedding = paddle.layer.embedding(
|
|
|
|
|
input=src_word_id,
|
|
|
|
|
size=word_vector_dim,
|
|
|
|
|
param_attr=attr.ParamAttr(name='_source_language_embedding'))
|
|
|
|
|
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size)
|
|
|
|
|
src_backward = networks.simple_gru(
|
|
|
|
|
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
|
|
|
|
|
src_forward = paddle.networks.simple_gru(
|
|
|
|
|
input=src_embedding, size=encoder_size)
|
|
|
|
|
src_backward = paddle.networks.simple_gru(
|
|
|
|
|
input=src_embedding, size=encoder_size, reverse=True)
|
|
|
|
|
encoded_vector = layer.concat(input=[src_forward, src_backward])
|
|
|
|
|
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
|
|
|
|
|
|
|
|
|
|
#### Decoder
|
|
|
|
|
with layer.mixed(size=decoder_size) as encoded_proj:
|
|
|
|
|
encoded_proj += layer.full_matrix_projection(input=encoded_vector)
|
|
|
|
|
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
|
|
|
|
|
encoded_proj += paddle.layer.full_matrix_projection(
|
|
|
|
|
input=encoded_vector)
|
|
|
|
|
|
|
|
|
|
backward_first = layer.first_seq(input=src_backward)
|
|
|
|
|
backward_first = paddle.layer.first_seq(input=src_backward)
|
|
|
|
|
|
|
|
|
|
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot:
|
|
|
|
|
decoder_boot += layer.full_matrix_projection(input=backward_first)
|
|
|
|
|
with paddle.layer.mixed(
|
|
|
|
|
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
|
|
|
|
|
decoder_boot += paddle.layer.full_matrix_projection(
|
|
|
|
|
input=backward_first)
|
|
|
|
|
|
|
|
|
|
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
|
|
|
|
|
|
|
|
|
|
decoder_mem = layer.memory(
|
|
|
|
|
decoder_mem = paddle.layer.memory(
|
|
|
|
|
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
|
|
|
|
|
|
|
|
|
|
context = networks.simple_attention(
|
|
|
|
|
context = paddle.networks.simple_attention(
|
|
|
|
|
encoded_sequence=enc_vec,
|
|
|
|
|
encoded_proj=enc_proj,
|
|
|
|
|
decoder_state=decoder_mem)
|
|
|
|
|
|
|
|
|
|
with layer.mixed(size=decoder_size * 3) as decoder_inputs:
|
|
|
|
|
decoder_inputs += layer.full_matrix_projection(input=context)
|
|
|
|
|
decoder_inputs += layer.full_matrix_projection(input=current_word)
|
|
|
|
|
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
|
|
|
|
|
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
|
|
|
|
|
decoder_inputs += paddle.layer.full_matrix_projection(
|
|
|
|
|
input=current_word)
|
|
|
|
|
|
|
|
|
|
gru_step = layer.gru_step(
|
|
|
|
|
gru_step = paddle.layer.gru_step(
|
|
|
|
|
name='gru_decoder',
|
|
|
|
|
input=decoder_inputs,
|
|
|
|
|
output_mem=decoder_mem,
|
|
|
|
|
size=decoder_size)
|
|
|
|
|
|
|
|
|
|
with layer.mixed(
|
|
|
|
|
size=target_dict_dim, bias_attr=True,
|
|
|
|
|
act=activation.Softmax()) as out:
|
|
|
|
|
out += layer.full_matrix_projection(input=gru_step)
|
|
|
|
|
with paddle.layer.mixed(
|
|
|
|
|
size=target_dict_dim,
|
|
|
|
|
bias_attr=True,
|
|
|
|
|
act=paddle.activation.Softmax()) as out:
|
|
|
|
|
out += paddle.layer.full_matrix_projection(input=gru_step)
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
decoder_group_name = "decoder_group"
|
|
|
|
|
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True)
|
|
|
|
|
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True)
|
|
|
|
|
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
|
|
|
|
|
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
|
|
|
|
|
group_inputs = [group_input1, group_input2]
|
|
|
|
|
|
|
|
|
|
trg_embedding = layer.embedding(
|
|
|
|
|
input=layer.data(
|
|
|
|
|
trg_embedding = paddle.layer.embedding(
|
|
|
|
|
input=paddle.layer.data(
|
|
|
|
|
name='target_language_word',
|
|
|
|
|
type=data_type.integer_value_sequence(target_dict_dim)),
|
|
|
|
|
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
|
|
|
|
|
size=word_vector_dim,
|
|
|
|
|
param_attr=attr.ParamAttr(name='_target_language_embedding'))
|
|
|
|
|
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
|
|
|
|
|
group_inputs.append(trg_embedding)
|
|
|
|
|
|
|
|
|
|
# For decoder equipped with attention mechanism, in training,
|
|
|
|
@ -77,14 +79,14 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
|
|
|
|
|
# while encoded source sequence is accessed to as an unbounded memory.
|
|
|
|
|
# Here, the StaticInput defines a read-only memory
|
|
|
|
|
# for the recurrent_group.
|
|
|
|
|
decoder = layer.recurrent_group(
|
|
|
|
|
decoder = paddle.layer.recurrent_group(
|
|
|
|
|
name=decoder_group_name,
|
|
|
|
|
step=gru_decoder_with_attention,
|
|
|
|
|
input=group_inputs)
|
|
|
|
|
|
|
|
|
|
lbl = layer.data(
|
|
|
|
|
lbl = paddle.layer.data(
|
|
|
|
|
name='target_language_next_word',
|
|
|
|
|
type=data_type.integer_value_sequence(target_dict_dim))
|
|
|
|
|
cost = layer.classification_cost(input=decoder, label=lbl)
|
|
|
|
|
type=paddle.data_type.integer_value_sequence(target_dict_dim))
|
|
|
|
|
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
|
|
|
|
|
|
|
|
|
|
return cost
|
|
|
|
|