Merge pull request #1761 from jacquesqiao/beam_search

support Beam search in v2 api
release/0.10.0
jacquesqiao 8 years ago committed by GitHub
commit b22cd96a3b

@ -1,13 +1,17 @@
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim):
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
@ -67,6 +71,7 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
@ -91,6 +96,32 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():

@ -67,7 +67,16 @@ class Layer(object):
self.name = name
self.__context__ = {}
self.__parent_layers__ = parent_layers
self.__children_layers__ = [] # used for evaluator.
# some layer may have some extra parent layer
self.__extra_parent__ = []
# used for evaluator.
self.__children_layers__ = []
def extra_parent(self):
return self.__extra_parent__
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def append_child(self, layer, parent_names):
self.__children_layers__.append((layer, parent_names))
@ -78,14 +87,20 @@ class Layer(object):
"""
self.__context__ = context
# short cut if myself is parsed before.
# STEP: short cut if this layer is parsed before.
if self.context_name() in context:
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
# parse parent before myself
# STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for p in self.__extra_parent__:
p.to_proto(context=context)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
@ -97,14 +112,13 @@ class Layer(object):
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
# parse myself.
# STEP: parse myself and add myself into context.
ret_val = self.to_proto_impl(**kwargs)
if self.context_name() is not None and \
self.context_name() not in context:
if self.context_name() is not None \
and self.context_name() not in context:
context[self.context_name()] = ret_val
# parse children.
# STEP: parse children that should be pased after this layer.
for layer, pnames in self.__children_layers__:
drop = False
@ -117,6 +131,7 @@ class Layer(object):
continue
layer.to_proto(context=context)
# STEP: return v1 layer result
if self.context_name() is None:
return ret_val
elif self.use_context_name():

File diff suppressed because it is too large Load Diff

@ -17,7 +17,6 @@ import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer
from layer import WithExtraParent
__all__ = ['Topology']
@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers):
__break__ = callback(each_layer)
if __break__:
return
__layers__ = each_layer.__parent_layers__.values()
if isinstance(each_layer, WithExtraParent):
__layers__ = __layers__ + each_layer.extra_parent()
__layers__ = each_layer.__parent_layers__.values() + \
each_layer.extra_parent()
__bfs_travel__(callback, *__layers__)

Loading…
Cancel
Save