beam search api and unitest in hierarchical rnn (#122)

9 years ago · 4615c5172c
parent baaaa0b09d
commit 4615c5172c
8 changed files with 184 additions and 74 deletions
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@ -171,12 +171,13 @@ def gru_encoder_decoder(data_conf,
        beam_gen = beam_search(name=decoder_group_name,
                               step=gru_decoder_with_attention,
                               input=group_inputs,
-                               id_input=data_layer(name="sent_id",
-                                                   size=1),
-                               dict_file=trg_dict_path,
                               bos_id=0,
                               eos_id=1,
                               beam_size=beam_size,
-                               max_length=max_length,
+                               max_length=max_length)
+
+        seqtext_printer_evaluator(input=beam_gen,
+                                  id_input=data_layer(name="sent_id", size=1),
+                                  dict_file=trg_dict_path,
                                  result_file=gen_trans_file)
        outputs(beam_gen)
--- a/doc/algorithm/rnn/rnn.rst
+++ b/doc/algorithm/rnn/rnn.rst
@ -202,12 +202,15 @@ After training the model, we can use it to generate sequences. A common practice
 * use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step.
 * use :code:`beam_search` function. This function needs to set:

-  - :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
-  - :code:`dict_file`: the dictionary file for converting word id to word.
  - :code:`bos_id`: the start token. Every sentence starts with the start token.
  - :code:`eos_id`: the end token. Every sentence ends with the end token.
  - :code:`beam_size`: the beam size used in beam search.
  - :code:`max_length`: the maximum length of the generated sentences.
+
+* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
+
+  - :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
+  - :code:`dict_file`: the dictionary file for converting word id to word.
  - :code:`result_file`: the path of the generation result file.
    
 The code is listed below:
@ -230,13 +233,14 @@ The code is listed below:
    beam_gen = beam_search(name=decoder_group_name,
                           step=gru_decoder_with_attention,
                           input=group_inputs,
-                           id_input=data_layer(name="sent_id",
-                                               size=1),
-                           dict_file=trg_dict_path,
                           bos_id=0, # Beginnning token.
                           eos_id=1, # End of sentence token.
                           beam_size=beam_size,
-                           max_length=max_length,
+                           max_length=max_length)
+
+    seqtext_printer_evaluator(input=beam_gen,
+                              id_input=data_layer(name="sent_id", size=1),
+                              dict_file=trg_dict_path,
                              result_file=gen_trans_file)
    outputs(beam_gen)

--- a/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nest
+++ b/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nest
@ -0,0 +1,16 @@
+0	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+	 1 2 3 4
+
--- a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
+++ b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
@ -0,0 +1,73 @@
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=15, learning_rate=0)
+
+num_words = 5
+beam_flag = get_config_arg('beam_search', bool, False)
+
+sent_id = data_layer(name="sent_id", size=1)
+
+# This layer has no actual use, but only to decide batch_size in generation.
+# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
+dummy_data = data_layer(name="dummy_data_input", size=2)
+
+def outer_step(dummy_data):
+
+    gen_inputs = [StaticInput(input=dummy_data, size=2, is_seq=True),
+                  GeneratedInput(size=num_words,
+                                 embedding_name="wordvec",
+                                 embedding_size=num_words)]
+
+    def inner_step(dummy_memory, predict_word):
+        
+        # simplified RNN for testing
+        with mixed_layer(size=num_words) as layer:
+            layer += full_matrix_projection(input=predict_word,
+                                            param_attr=ParamAttr(name="transtable"))
+
+        with mixed_layer(size=num_words, act=ExpActivation()) as out:
+            out += trans_full_matrix_projection(input=layer,
+                                                param_attr=ParamAttr(name="wordvec"))
+
+        return out
+    
+    beam_gen = beam_search(name="rnn_gen",
+                           step=inner_step,
+                           input=gen_inputs,
+                           bos_id=0,
+                           eos_id=num_words-1,
+                           beam_size=2 if beam_flag else 1,
+                           num_results_per_sample=2 if beam_flag else 1,
+                           max_length=10) 
+    return beam_gen
+
+beam_gen_concat = recurrent_group(name="rnn_gen_concat",
+                                  step=outer_step,
+                                  input=[SubsequenceInput(dummy_data)])
+
+seqtext_printer_evaluator(input=beam_gen_concat,
+                          id_input=sent_id,
+                          dict_file="./trainer/tests/test_gen_dict.txt",
+                          result_file="./trainer/tests/dump_text.test")
+#outputs(beam_gen_concat)
+# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
+# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
+# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
+Inputs("sent_id","dummy_data_input")
+Outputs("__beam_search_predict__")
--- a/paddle/trainer/tests/sample_trainer_rnn_gen.conf
+++ b/paddle/trainer/tests/sample_trainer_rnn_gen.conf
@ -48,15 +48,16 @@ def step(dummy_memory, predict_word):
 beam_gen = beam_search(name="rnn_gen",
                       step=step,
                       input=gen_inputs,
-                       id_input=sent_id,
-                       dict_file="./trainer/tests/test_gen_dict.txt",
-                       result_file="./trainer/tests/dump_text.test",
                       bos_id=0,
                       eos_id=num_words-1,
                       beam_size=2 if beam_flag else 1,
                       num_results_per_sample=2 if beam_flag else 1,
                       max_length=10) 

+seqtext_printer_evaluator(input=beam_gen,
+                          id_input=sent_id,
+                          dict_file="./trainer/tests/test_gen_dict.txt",
+                          result_file="./trainer/tests/dump_text.test")
 #outputs(beam_gen)
 # In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
 # is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
--- a/paddle/trainer/tests/test_recurrent_machine_generation.cpp
+++ b/paddle/trainer/tests/test_recurrent_machine_generation.cpp
@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-
 #include <fstream>

 #include <paddle/utils/PythonUtil.h>
@ -24,6 +23,8 @@ using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT

 static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf";
+static const string& NEST_CONFIG_FILE =
+    "trainer/tests/sample_trainer_nest_rnn_gen.conf";
 static const string& OUTPUT_DIR = "trainer/tests/dump_text.test";
 static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1";  // NOLINT
 static string expectFile =                                           // NOLINT
@ -50,32 +51,52 @@ void checkOutput(const string& expRetFile) {
  }
 }

-void prepareInArgs(vector<Argument>& inArgs,
-                   const size_t batchSize, bool useGpu) {
+void prepareInArgs(vector<Argument>& inArgs, const size_t batchSize,
+                   bool useGpu, bool hasSubseq) {
  inArgs.clear();
  // sentence id
  Argument sentId;
  sentId.value = nullptr;
+  if (hasSubseq) {
+    // as there is only one sequence, there is only one label.
+    IVector::resizeOrCreate(sentId.ids, 1, useGpu);
+    sentId.ids->setElement(0, 0);
+  } else {
+    // as there is batchSize word, there is batchSize label.
    IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
    for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
+  }
  inArgs.emplace_back(sentId);

  // a dummy layer to decide batch size
  Argument dummyInput;
  dummyInput.value = Matrix::create(batchSize, 2, false, useGpu);
  dummyInput.value->randomizeUniform();
+  if (hasSubseq) {
+    // generate one sequence with batchSize subsequence,
+    // and each subsequence has only one word.
+    dummyInput.sequenceStartPositions = ICpuGpuVector::create(2, false);
+    int* buf = dummyInput.sequenceStartPositions->getMutableData(false);
+    dummyInput.subSequenceStartPositions =
+        ICpuGpuVector::create(batchSize + 1, false);
+    int* subBuf = dummyInput.subSequenceStartPositions->getMutableData(false);
+    buf[0] = 0;
+    buf[1] = batchSize;
+    for (size_t i = 0; i < batchSize + 1; i++) subBuf[i] = i;
+  }
  inArgs.emplace_back(dummyInput);
 }

-void testGeneration(bool useGpu, const string& expRetFile) {
+void testGeneration(const string& configFile, bool useGpu, bool hasSubseq,
+                    const string& expRetFile) {
  FLAGS_use_gpu = useGpu;
-  auto config = std::make_shared<TrainerConfigHelper>(CONFIG_FILE);
+  auto config = std::make_shared<TrainerConfigHelper>(configFile);
  unique_ptr<GradientMachine> gradientMachine(GradientMachine::create(*config));
  gradientMachine->loadParameters(modelDir);
  vector<Argument> inArgs(2);

  const size_t batchSize = 15;
-  prepareInArgs(inArgs, batchSize, useGpu);
+  prepareInArgs(inArgs, batchSize, useGpu, hasSubseq);
  vector<Argument> outArgs;
  unique_ptr<Evaluator> testEvaluator(gradientMachine->makeEvaluator());
  testEvaluator->start();
@ -93,16 +114,21 @@ TEST(RecurrentGradientMachine, test_generation) {
 #else
  const auto useGpuConfs = {true, false};
 #endif
-  FLAGS_config_args = "beam_search=0";  // no beam search
-  string expectRetFileNoBeam = expectFile + ".nobeam";
-  for (auto useGpu : useGpuConfs) {
-    testGeneration(useGpu, expectRetFileNoBeam);
-  }
-  FLAGS_config_args = "beam_search=1";  // no beam search
-  string expectRetFileBeam = expectFile + ".beam";
+  auto testGen = [&](const string& configFile, bool hasSubseq,
+                     const string& expRetFile, bool beam_search) {
+    FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
    for (auto useGpu : useGpuConfs) {
-    testGeneration(useGpu, expectRetFileBeam);
+      testGeneration(configFile, useGpu, hasSubseq, expRetFile);
    }
+  };
+  testGen(CONFIG_FILE, false, expectFile + ".nobeam", false);  // no beam search
+  testGen(CONFIG_FILE, false, expectFile + ".beam", true);     // beam search
+  // In hierarchical RNN, beam search and one way search are only in inner-RNN,
+  // outer-RNN will concat the generated inner-results (first for beam search)
+  // from inner-RNN. Thus, they have the same outer-results.
+  testGen(NEST_CONFIG_FILE, true, expectFile + ".nest",
+          false);  // no beam search
+  testGen(NEST_CONFIG_FILE, true, expectFile + ".nest", true);  // beam search
 }
 #endif

--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@ -559,6 +559,7 @@ def maxframe_printer_evaluator(
 def seqtext_printer_evaluator(
        input,
        result_file,
+        id_input=None,
        dict_file=None,
        delimited=None,
        name=None,
@ -567,11 +568,10 @@ def seqtext_printer_evaluator(
    Sequence text printer will print text according to index matrix and a
    dictionary. There can be multiple input to this layer:

-    1. If there is only one input, the input must be a matrix containing
+    1. If there is no id_input, the input must be a matrix containing
    the sequence of indices;

-    2. If there are more than one input, the first input should be ids,
-    and are interpreted as sample ids.
+    2. If there is id_input, it should be ids, and interpreted as sample ids.

    The output format will be:

@ -602,26 +602,43 @@ def seqtext_printer_evaluator(

    .. code-block:: python

-       eval = seqtext_printer_evaluator(input,
+       eval = seqtext_printer_evaluator(input=maxid_layer,
+                                        id_input=sample_id,
                                        dict_file=dict_file,
                                        result_file=result_file)

    :param input: Input Layer name.
    :type input: LayerOutput|list
-    :param dict_file: The input dictionary which contains a list of tokens.
-    :type dict_file: basestring
-    :param result_file: The file is to save the results.
+    :param result_file: Path of the file to store the generated results.
    :type result_file: basestring
+    :param id_input: Index of the input sequence, and the specified index will
+                     be prited in the gereated results. This an optional
+                     parameter.
+    :type id_input: LayerOutput
+    :param dict_file: Path of dictionary. This is an optional parameter.
+                      Every line is a word in the dictionary with
+                      (line number - 1) as the word index.
+                      If this parameter is set to None, or to an empty string,
+                      only word index are printed in the generated results.
+    :type dict_file: basestring
    :param delimited: Whether to use space to separate output tokens.
                Default is True. No space is added if set to False.
    :type delimited: bool
    :param name: Evaluator name.
    :type name: None|basestring
+    :return: The seq_text_printer that prints the generated sequence to a file.
+    :rtype: evaluator
    """
    assert isinstance(result_file, basestring)
+    if id_input is None:
+        inputs = [input]
+    else:
+        inputs = [id_input, input]
+        input.parents.append(id_input)
+
    evaluator_base(name=name,
                   type="seq_text_printer",
-                   input=input,
+                   input=inputs,
                   dict_file=dict_file,
                   result_file=result_file,
                   delimited=delimited)
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -2608,7 +2608,6 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):

@wrap_name_default()
 def beam_search(step, input, bos_id, eos_id, beam_size,
-                result_file, dict_file="", id_input=None,
                max_length=500, name=None,
                num_results_per_sample=None):
    """
@ -2632,8 +2631,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
                               input=[StaticInput(encoder_last)],
                               bos_id=0,
                               eos_id=1,
-                               beam_size=5,
-                               result_file="./generated_sequences.txt")
+                               beam_size=5)

    Please see the following demo for more details:

@ -2671,24 +2669,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
                      of the most promising next words. The greater the beam
                      size, the fewer candidate words are pruned.
    :type beam_size: int
-    :param result_file: Path of the file to store the generated results.
-    :type result_file: basestring
-    :param dict_file: Path of dictionary. This is an optional parameter.
-                      Every line is a word in the dictionary with
-                      (line number - 1) as the word index.
-                      If this parameter is set to None, or to an empty string,
-                      only word index are printed in the generated results.
-    :type dict_file: basestring
    :param num_results_per_sample: Number of the generated results per input
                                  sequence. This number must always be less than
                                  beam size.
    :type num_results_per_sample: int
-    :param id_input: Index of the input sequence, and the specified index will
-                     be prited in the gereated results. This an optional
-                     parameter.
-    :type id_input: LayerOutput
-    :return: The seq_text_printer that prints the generated sequence to a file.
-    :rtype: evaluator
+    :return: The generated word index.
+    :rtype: LayerOutput
    """

    if num_results_per_sample is None:
@ -2704,7 +2690,6 @@ def beam_search(step, input, bos_id, eos_id, beam_size,

    real_input = []
    for i, each_input in enumerate(input):
-        # print type(each_input)
        assert isinstance(each_input, StaticInput) or isinstance(
            each_input, BaseGeneratedInput)
        if isinstance(each_input, BaseGeneratedInput):
@ -2741,19 +2726,6 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
    tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False,
                          name=name)
    
-    if id_input is None:
-        inputs = [tmp.name]
-    else:
-        assert isinstance(id_input, LayerOutput)
-        inputs = [id_input.name, tmp.name]
-        tmp.parents.append(id_input)
-
-    Evaluator(name='target_printer',
-              type='seq_text_printer',
-              dict_file=dict_file,
-              result_file=result_file,
-              inputs=inputs
-              )
    return tmp