From 326fa176ea6401f171e9325aa29fb0b5cf6f7a29 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Sun, 4 Feb 2018 22:45:47 +0800 Subject: [PATCH 1/3] Fix empty output tensor and add an unitest case --- paddle/operators/ctc_align_op.cu | 8 ++++++++ paddle/operators/ctc_align_op.h | 9 ++++++++- python/paddle/v2/fluid/tests/test_ctc_align.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu index 2a970cd9fa..918df83eff 100644 --- a/paddle/operators/ctc_align_op.cu +++ b/paddle/operators/ctc_align_op.cu @@ -80,6 +80,14 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { // resize output dims output->Resize({static_cast(host_out_lod0.back()), 1}); + + if (host_out_lod0.back() == 0) { + output->Resize({1}); + output->mutable_data(ctx.GetPlace()); + math::SetConstant set_constant; + set_constant(ctx.template device_context(), + output, -1); + } } }; diff --git a/paddle/operators/ctc_align_op.h b/paddle/operators/ctc_align_op.h index fed89aa1e8..7a063870f3 100644 --- a/paddle/operators/ctc_align_op.h +++ b/paddle/operators/ctc_align_op.h @@ -16,6 +16,8 @@ limitations under the License. */ #include #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + namespace paddle { namespace operators { @@ -65,9 +67,14 @@ class CTCAlignKernel : public framework::OpKernel { framework::LoD output_lod; output_lod.push_back(output_lod0); output->set_lod(output_lod); - // resize output dims output->Resize({static_cast(output_lod0.back()), 1}); + // for empty sequence + if (output_lod0.back() == 0) { + output->Resize({1}); + output_data = output->mutable_data(ctx.GetPlace()); + output_data[0] = -1; + } } }; diff --git a/python/paddle/v2/fluid/tests/test_ctc_align.py b/python/paddle/v2/fluid/tests/test_ctc_align.py index 773c69d1ad..cc815d8e9e 100644 --- a/python/paddle/v2/fluid/tests/test_ctc_align.py +++ b/python/paddle/v2/fluid/tests/test_ctc_align.py @@ -31,6 +31,8 @@ def CTCAlign(input, lod, blank, merge_repeated): result.append(token) prev_token = token result = np.array(result).reshape([len(result), 1]).astype("int32") + if len(result) == 0: + result = np.array([-1]) return result @@ -72,5 +74,14 @@ class TestCTCAlignOpCase1(TestCTCAlignOp): [19, 1]).astype("int32") +class TestCTCAlignOpCase2(TestCTCAlignOp): + def config(self): + self.op_type = "ctc_align" + self.input_lod = [[0, 4]] + self.blank = 0 + self.merge_repeated = True + self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32") + + if __name__ == "__main__": unittest.main() From 863cd9c766e30b487d88ddd0b797a3b59a421282 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Tue, 6 Feb 2018 09:54:14 +0800 Subject: [PATCH 2/3] Add comments to explain the empty result --- python/paddle/v2/fluid/layers/nn.py | 39 +++++++++++++++-------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index a79479f469..2209625344 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -410,12 +410,12 @@ def dynamic_lstmp(input, """ **Dynamic LSTMP Layer** - LSTMP (LSTM with recurrent projection) layer has a separate projection - layer after the LSTM layer, projecting the original hidden state to a - lower-dimensional one, which is proposed to reduce the number of total - parameters and furthermore computational complexity for the LSTM, - espeacially for the case that the size of output units is relative - large (https://research.google.com/pubs/archive/43905.pdf). + LSTMP (LSTM with recurrent projection) layer has a separate projection + layer after the LSTM layer, projecting the original hidden state to a + lower-dimensional one, which is proposed to reduce the number of total + parameters and furthermore computational complexity for the LSTM, + espeacially for the case that the size of output units is relative + large (https://research.google.com/pubs/archive/43905.pdf). The formula is as follows: @@ -441,27 +441,27 @@ def dynamic_lstmp(input, the matrix of weights from the input gate to the input). * :math:`W_{ic}`, :math:`W_{fc}`, :math:`W_{oc}`: Diagonal weight \ matrices for peephole connections. In our implementation, \ - we use vectors to reprenset these diagonal weight matrices. + we use vectors to reprenset these diagonal weight matrices. * :math:`b`: Denotes bias vectors (e.g. :math:`b_i` is the input gate \ - bias vector). + bias vector). * :math:`\sigma`: The activation, such as logistic sigmoid function. * :math:`i, f, o` and :math:`c`: The input gate, forget gate, output \ gate, and cell activation vectors, respectively, all of which have \ - the same size as the cell output activation vector :math:`h`. + the same size as the cell output activation vector :math:`h`. * :math:`h`: The hidden state. - * :math:`r`: The recurrent projection of the hidden state. + * :math:`r`: The recurrent projection of the hidden state. * :math:`\\tilde{c_t}`: The candidate hidden state, whose \ computation is based on the current input and previous hidden state. - * :math:`\odot`: The element-wise product of the vectors. + * :math:`\odot`: The element-wise product of the vectors. * :math:`act_g` and :math:`act_h`: The cell input and cell output \ - activation functions and `tanh` is usually used for them. + activation functions and `tanh` is usually used for them. * :math:`\overline{act_h}`: The activation function for the projection \ output, usually using `identity` or same as :math:`act_h`. Set `use_peepholes` to `False` to disable peephole connection. The formula is omitted here, please refer to the paper http://www.bioinf.jku.at/publications/older/2604.pdf for details. - + Note that these :math:`W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}` operations on the input :math:`x_{t}` are NOT included in this operator. Users can choose to use fully-connected layer before LSTMP layer. @@ -479,8 +479,8 @@ def dynamic_lstmp(input, - Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \ W_{fh}, W_{oh}`}. - - The shape of hidden-hidden weight is (P x 4D), - where P is the projection size and D the hidden + - The shape of hidden-hidden weight is (P x 4D), + where P is the projection size and D the hidden size. - Projection weight = {:math:`W_{rh}`}. - The shape of projection weight is (D x P). @@ -525,9 +525,9 @@ def dynamic_lstmp(input, hidden_dim, proj_dim = 512, 256 fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4, act=None, bias_attr=None) - proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, - size=hidden_dim * 4, - proj_size=proj_dim, + proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, + size=hidden_dim * 4, + proj_size=proj_dim, use_peepholes=False, is_reverse=True, cell_activation="tanh", @@ -2525,7 +2525,8 @@ def ctc_greedy_decoder(input, blank, name=None): interval [0, num_classes + 1). Returns: - Variable: CTC greedy decode result. + Variable: CTC greedy decode result. If all the sequences in result were + empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1]. Examples: .. code-block:: python From 3aae78159b6b9cd12f2a60b071c7e86abf45e7ee Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Tue, 6 Feb 2018 16:36:31 +0800 Subject: [PATCH 3/3] Change the dims of empty result to [1, 1] --- paddle/operators/ctc_align_op.cu | 2 +- paddle/operators/ctc_align_op.h | 2 +- python/paddle/v2/fluid/layers/nn.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu index 918df83eff..cea595d7c5 100644 --- a/paddle/operators/ctc_align_op.cu +++ b/paddle/operators/ctc_align_op.cu @@ -82,7 +82,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { output->Resize({static_cast(host_out_lod0.back()), 1}); if (host_out_lod0.back() == 0) { - output->Resize({1}); + output->Resize({1, 1}); output->mutable_data(ctx.GetPlace()); math::SetConstant set_constant; set_constant(ctx.template device_context(), diff --git a/paddle/operators/ctc_align_op.h b/paddle/operators/ctc_align_op.h index 7a063870f3..54ad1d6f5c 100644 --- a/paddle/operators/ctc_align_op.h +++ b/paddle/operators/ctc_align_op.h @@ -71,7 +71,7 @@ class CTCAlignKernel : public framework::OpKernel { output->Resize({static_cast(output_lod0.back()), 1}); // for empty sequence if (output_lod0.back() == 0) { - output->Resize({1}); + output->Resize({1, 1}); output_data = output->mutable_data(ctx.GetPlace()); output_data[0] = -1; } diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 2209625344..0b3b56bc22 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -2526,7 +2526,7 @@ def ctc_greedy_decoder(input, blank, name=None): Returns: Variable: CTC greedy decode result. If all the sequences in result were - empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1]. + empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1, 1]. Examples: .. code-block:: python