Fix beam search bug (#29824)

* fix beam search bug * add dygraph unittest * update dynamic_decode argument doc * add warning info for state which has no lengths attribute
4 years ago · 2e8425b693
parent f43e1d8c57
commit 2e8425b693
2 changed files with 58 additions and 39 deletions
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@ -16,6 +16,7 @@ from __future__ import print_function
 import sys
 from functools import partial, reduce
 import warnings
 import paddle
 from paddle.utils import deprecated
@ -1378,14 +1379,21 @@ def _dynamic_decode_imperative(decoder,
            # To confirm states.finished/finished be consistent with
            # next_finished.
            tensor.assign(next_finished, finished)
-        next_sequence_lengths = nn.elementwise_add(
+            next_sequence_lengths = nn.elementwise_add(
-            sequence_lengths,
+                sequence_lengths,
-            tensor.cast(
+                tensor.cast(
-                control_flow.logical_not(finished), sequence_lengths.dtype))
+                    control_flow.logical_not(finished), sequence_lengths.dtype))
-
+            if impute_finished:  # rectify the states for the finished.
-        if impute_finished:  # rectify the states for the finished.
+                next_states = map_structure(
-            next_states = map_structure(
+                    lambda x, y: _maybe_copy(x, y, finished), states,
-                lambda x, y: _maybe_copy(x, y, finished), states, next_states)
+                    next_states)
        else:
            warnings.warn(
                "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros."
            ) if not hasattr(next_states, "lengths") else None
            next_sequence_lengths = getattr(next_states, "lengths",
                                            sequence_lengths)
        outputs = map_structure(
            lambda x: ArrayWrapper(x),
            step_outputs) if step_idx == 0 else map_structure(
@ -1500,17 +1508,22 @@ def _dynamic_decode_declarative(decoder,
            # finished.
            next_finished = control_flow.logical_or(next_finished,
                                                    global_finished)
-        next_sequence_lengths = nn.elementwise_add(
+            next_sequence_lengths = nn.elementwise_add(
-            sequence_lengths,
+                sequence_lengths,
-            tensor.cast(
+                tensor.cast(
-                control_flow.logical_not(global_finished),
+                    control_flow.logical_not(global_finished),
-                sequence_lengths.dtype))
+                    sequence_lengths.dtype))
-
+            if impute_finished:  # rectify the states for the finished.
-        if impute_finished:  # rectify the states for the finished.
+                next_states = map_structure(
-            next_states = map_structure(
+                    lambda x, y: _maybe_copy(x, y, global_finished),
-                lambda x, y: _maybe_copy(x, y, global_finished),
+                    states,
-                states,
+                    next_states, )
-                next_states, )
+        else:
            warnings.warn(
                "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros."
            ) if not hasattr(next_states, "lengths") else None
            next_sequence_lengths = getattr(next_states, "lengths",
                                            sequence_lengths)
        # create tensor array in global block after dtype[s] of outputs can be got
        outputs_arrays = map_structure(
@ -1595,13 +1608,13 @@ def dynamic_decode(decoder,
            attr:`False`, the data layout would be batch major with shape
            `[batch_size, seq_len, ...]`.  If attr:`True`, the data layout would
            be time major with shape `[seq_len, batch_size, ...]`. Default: `False`.
-        impute_finished(bool, optional): If `True`, then states get copied through
+        impute_finished(bool, optional): If `True` and `decoder.tracks_own_finished`
-            for batch entries which are marked as finished, which differs with the
+            is False, then states get copied through for batch entries which are
-            unfinished using the new states returned by :code:`decoder.step()` and
+            marked as finished, which differs with the unfinished using the new states
-            ensures that the final states have the correct values. Otherwise, states
+            returned by :code:`decoder.step()` and ensures that the final states have
-            wouldn't be copied through when finished. If the returned `final_states`
+            the correct values. Otherwise, states wouldn't be copied through when
-            is needed, it should be set as True, which causes some slowdown.
+            finished. If the returned `final_states` is needed, it should be set as
-            Default `False`.
+            True, which causes some slowdown. Default `False`.
        is_test(bool, optional): A flag indicating whether to use test mode. In
            test mode, it is more memory saving. Default `False`.
        return_length(bool, optional):  A flag indicating whether to return an
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@ -178,16 +178,14 @@ class Seq2SeqModel(object):
                 beam_size=4):
        self.start_token, self.end_token = start_token, end_token
        self.max_decoding_length, self.beam_size = max_decoding_length, beam_size
-        self.src_embeder = lambda x: fluid.embedding(
+        self.src_embeder = paddle.nn.Embedding(
-            input=x,
+            src_vocab_size,
-            size=[src_vocab_size, hidden_size],
+            hidden_size,
-            dtype="float32",
+            weight_attr=fluid.ParamAttr(name="source_embedding"))
-            param_attr=fluid.ParamAttr(name="source_embedding"))
+        self.trg_embeder = paddle.nn.Embedding(
-        self.trg_embeder = lambda x: fluid.embedding(
+            trg_vocab_size,
-            input=x,
+            hidden_size,
-            size=[trg_vocab_size, hidden_size],
+            weight_attr=fluid.ParamAttr(name="target_embedding"))
            dtype="float32",
            param_attr=fluid.ParamAttr(name="target_embedding"))
        self.encoder = Encoder(num_layers, hidden_size, dropout_prob)
        self.decoder = Decoder(num_layers, hidden_size, dropout_prob,
                               decoding_strategy, max_decoding_length)
@ -195,7 +193,7 @@ class Seq2SeqModel(object):
            x,
            size=trg_vocab_size,
            num_flatten_dims=len(x.shape) - 1,
-            param_attr=fluid.ParamAttr(name="output_w"),
+            param_attr=fluid.ParamAttr(),
            bias_attr=False)
    def __call__(self, src, src_length, trg=None, trg_length=None):
@ -556,6 +554,14 @@ class TestDynamicDecode(unittest.TestCase):
                },
                fetch_list=[output])[0]
    def test_dynamic_basic_decoder(self):
        paddle.disable_static()
        src = paddle.to_tensor(np.random.randint(8, size=(8, 4)))
        src_length = paddle.to_tensor(np.random.randint(8, size=(8)))
        model = Seq2SeqModel(**self.model_hparams)
        probs, samples, sample_length = model(src, src_length)
        paddle.enable_static()
 class ModuleApiTest(unittest.TestCase):
    @classmethod
@ -672,8 +678,8 @@ class TestBeamSearch(ModuleApiTest):
                   hidden_size,
                   bos_id=0,
                   eos_id=1,
-                   beam_size=2,
+                   beam_size=4,
-                   max_step_num=2):
+                   max_step_num=20):
        embedder = paddle.fluid.dygraph.Embedding(
            size=[vocab_size, embed_dim], dtype="float64")
        output_layer = nn.Linear(hidden_size, vocab_size)