|
|
|
@ -13,96 +13,53 @@
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
|
|
|
|
|
|
|
|
|
|
import math
|
|
|
|
|
from paddle.trainer_config_helpers import *
|
|
|
|
|
|
|
|
|
|
beam_search = get_config_arg('beam_search', bool, False)
|
|
|
|
|
|
|
|
|
|
model_type("recurrent_nn")
|
|
|
|
|
|
|
|
|
|
Settings(learning_rate=0, batch_size=15, algorithm='sgd')
|
|
|
|
|
|
|
|
|
|
Inputs("sent_id", "dummy_data_input")
|
|
|
|
|
Outputs("predict_word")
|
|
|
|
|
settings(batch_size=15, learning_rate=0)
|
|
|
|
|
|
|
|
|
|
num_words = 5
|
|
|
|
|
beam_flag = get_config_arg('beam_search', bool, False)
|
|
|
|
|
|
|
|
|
|
DataLayer(name="sent_id", size=1, )
|
|
|
|
|
sent_id = data_layer(name="sent_id", size=1)
|
|
|
|
|
|
|
|
|
|
# This layer has no actual use, but only to decide batch_size in generation.
|
|
|
|
|
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
|
|
|
|
|
DataLayer(name="dummy_data_input", size=2, )
|
|
|
|
|
|
|
|
|
|
if beam_search:
|
|
|
|
|
RecurrentLayerGroupBegin("decoding_layer_group",
|
|
|
|
|
in_links=[],
|
|
|
|
|
out_links=["predict_word"],
|
|
|
|
|
generator=Generator(max_num_frames=10,
|
|
|
|
|
beam_size=2,
|
|
|
|
|
num_results_per_sample=2, ))
|
|
|
|
|
else:
|
|
|
|
|
RecurrentLayerGroupBegin("decoding_layer_group",
|
|
|
|
|
in_links=[],
|
|
|
|
|
out_links=["predict_word"],
|
|
|
|
|
generator=Generator(max_num_frames=10, ))
|
|
|
|
|
dummy_memory = Memory(name="dummy_memory",
|
|
|
|
|
size=2,
|
|
|
|
|
boot_layer="dummy_data_input")
|
|
|
|
|
MixedLayer(name="dummy_memory",
|
|
|
|
|
size=2,
|
|
|
|
|
bias=False,
|
|
|
|
|
inputs=[IdentityProjection(dummy_memory)], )
|
|
|
|
|
state_memory = Memory(name="state",
|
|
|
|
|
size=num_words,
|
|
|
|
|
#boot_bias=True,
|
|
|
|
|
#boot_bias_active_type = "tanh",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
predict_word_memory = Memory(name="predict_word",
|
|
|
|
|
size=num_words,
|
|
|
|
|
boot_with_const_id=0, )
|
|
|
|
|
|
|
|
|
|
MixedLayer(
|
|
|
|
|
name = "word_embedding",
|
|
|
|
|
size = num_words, # word embedding dim is the same as num_words in this test.
|
|
|
|
|
bias = False,
|
|
|
|
|
inputs = TableProjection(predict_word_memory,
|
|
|
|
|
initial_std=1,
|
|
|
|
|
learning_rate=0,
|
|
|
|
|
parameter_name="wordvec"))
|
|
|
|
|
|
|
|
|
|
Layer( # simplified RNN for testing
|
|
|
|
|
name="state",
|
|
|
|
|
type="mixed",
|
|
|
|
|
size=num_words,
|
|
|
|
|
bias=False,
|
|
|
|
|
inputs=[FullMatrixProjection("word_embedding",
|
|
|
|
|
parameter_name="transtable")])
|
|
|
|
|
|
|
|
|
|
Layer(name="output",
|
|
|
|
|
type="mixed",
|
|
|
|
|
size=num_words,
|
|
|
|
|
active_type="exponential",
|
|
|
|
|
bias=False,
|
|
|
|
|
inputs=TransposedFullMatrixProjection("state",
|
|
|
|
|
initial_std=1,
|
|
|
|
|
learning_rate=0,
|
|
|
|
|
parameter_name="wordvec"), )
|
|
|
|
|
|
|
|
|
|
Layer(name="predict_word", type="maxid", inputs=["output"], )
|
|
|
|
|
|
|
|
|
|
Layer(name="eos_check",
|
|
|
|
|
type="eos_id",
|
|
|
|
|
eos_id=num_words - 1,
|
|
|
|
|
inputs=["predict_word"], )
|
|
|
|
|
RecurrentLayerGroupEnd("decoding_layer_group")
|
|
|
|
|
|
|
|
|
|
Evaluator(name="answer_printer",
|
|
|
|
|
type="seq_text_printer",
|
|
|
|
|
dict_file="./trainer/tests/test_gen_dict.txt",
|
|
|
|
|
result_file="./trainer/tests/dump_text.test",
|
|
|
|
|
inputs=[
|
|
|
|
|
"sent_id",
|
|
|
|
|
"predict_word",
|
|
|
|
|
], )
|
|
|
|
|
dummy_data = data_layer(name="dummy_data_input", size=2)
|
|
|
|
|
|
|
|
|
|
gen_inputs = [StaticInput(input=dummy_data, size=2),
|
|
|
|
|
GeneratedInput(size=num_words,
|
|
|
|
|
embedding_name="wordvec",
|
|
|
|
|
embedding_size=num_words)]
|
|
|
|
|
|
|
|
|
|
def step(dummy_memory, predict_word):
|
|
|
|
|
|
|
|
|
|
# simplified RNN for testing
|
|
|
|
|
with mixed_layer(size=num_words) as layer:
|
|
|
|
|
layer += full_matrix_projection(input=predict_word,
|
|
|
|
|
param_attr=ParamAttr(name="transtable"))
|
|
|
|
|
|
|
|
|
|
with mixed_layer(size=num_words, act=ExpActivation()) as out:
|
|
|
|
|
out += trans_full_matrix_projection(input=layer,
|
|
|
|
|
param_attr=ParamAttr(name="wordvec"))
|
|
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
beam_gen = beam_search(name="rnn_gen",
|
|
|
|
|
step=step,
|
|
|
|
|
input=gen_inputs,
|
|
|
|
|
id_input=sent_id,
|
|
|
|
|
dict_file="./trainer/tests/test_gen_dict.txt",
|
|
|
|
|
result_file="./trainer/tests/dump_text.test",
|
|
|
|
|
bos_id=0,
|
|
|
|
|
eos_id=num_words-1,
|
|
|
|
|
beam_size=2 if beam_flag else 1,
|
|
|
|
|
num_results_per_sample=2 if beam_flag else 1,
|
|
|
|
|
max_length=10)
|
|
|
|
|
|
|
|
|
|
#outputs(beam_gen)
|
|
|
|
|
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
|
|
|
|
|
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
|
|
|
|
|
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
|
|
|
|
|
Inputs("sent_id","dummy_data_input")
|
|
|
|
|
Outputs("__beam_search_predict__")
|
|
|
|
|