From 326fa176ea6401f171e9325aa29fb0b5cf6f7a29 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Sun, 4 Feb 2018 22:45:47 +0800
Subject: [PATCH 1/3] Fix empty output tensor and add an unitest case

---
 paddle/operators/ctc_align_op.cu               |  8 ++++++++
 paddle/operators/ctc_align_op.h                |  9 ++++++++-
 python/paddle/v2/fluid/tests/test_ctc_align.py | 11 +++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu
index 2a970cd9fa..918df83eff 100644
--- a/paddle/operators/ctc_align_op.cu
+++ b/paddle/operators/ctc_align_op.cu
@@ -80,6 +80,14 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
 
     // resize output dims
     output->Resize({static_cast<int64_t>(host_out_lod0.back()), 1});
+
+    if (host_out_lod0.back() == 0) {
+      output->Resize({1});
+      output->mutable_data<T>(ctx.GetPlace());
+      math::SetConstant<platform::CUDADeviceContext, T> set_constant;
+      set_constant(ctx.template device_context<platform::CUDADeviceContext>(),
+                   output, -1);
+    }
   }
 };
 
diff --git a/paddle/operators/ctc_align_op.h b/paddle/operators/ctc_align_op.h
index fed89aa1e8..7a063870f3 100644
--- a/paddle/operators/ctc_align_op.h
+++ b/paddle/operators/ctc_align_op.h
@@ -16,6 +16,8 @@ limitations under the License. */
 
 #include <string.h>
 #include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/math_function.h"
+
 namespace paddle {
 namespace operators {
 
@@ -65,9 +67,14 @@ class CTCAlignKernel : public framework::OpKernel<T> {
     framework::LoD output_lod;
     output_lod.push_back(output_lod0);
     output->set_lod(output_lod);
-
     // resize output dims
     output->Resize({static_cast<int64_t>(output_lod0.back()), 1});
+    // for empty sequence
+    if (output_lod0.back() == 0) {
+      output->Resize({1});
+      output_data = output->mutable_data<T>(ctx.GetPlace());
+      output_data[0] = -1;
+    }
   }
 };
 
diff --git a/python/paddle/v2/fluid/tests/test_ctc_align.py b/python/paddle/v2/fluid/tests/test_ctc_align.py
index 773c69d1ad..cc815d8e9e 100644
--- a/python/paddle/v2/fluid/tests/test_ctc_align.py
+++ b/python/paddle/v2/fluid/tests/test_ctc_align.py
@@ -31,6 +31,8 @@ def CTCAlign(input, lod, blank, merge_repeated):
                 result.append(token)
             prev_token = token
     result = np.array(result).reshape([len(result), 1]).astype("int32")
+    if len(result) == 0:
+        result = np.array([-1])
     return result
 
 
@@ -72,5 +74,14 @@ class TestCTCAlignOpCase1(TestCTCAlignOp):
                 [19, 1]).astype("int32")
 
 
+class TestCTCAlignOpCase2(TestCTCAlignOp):
+    def config(self):
+        self.op_type = "ctc_align"
+        self.input_lod = [[0, 4]]
+        self.blank = 0
+        self.merge_repeated = True
+        self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32")
+
+
 if __name__ == "__main__":
     unittest.main()

From 863cd9c766e30b487d88ddd0b797a3b59a421282 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Tue, 6 Feb 2018 09:54:14 +0800
Subject: [PATCH 2/3] Add comments to explain the empty result

---
 python/paddle/v2/fluid/layers/nn.py | 39 +++++++++++++++--------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index a79479f469..2209625344 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -410,12 +410,12 @@ def dynamic_lstmp(input,
     """
     **Dynamic LSTMP Layer**
 
-    LSTMP (LSTM with recurrent projection) layer has a separate projection 
-    layer after the LSTM layer, projecting the original hidden state to a 
-    lower-dimensional one, which is proposed to reduce the number of total 
-    parameters and furthermore computational complexity for the LSTM, 
-    espeacially for the case that the size of output units is relative 
-    large (https://research.google.com/pubs/archive/43905.pdf). 
+    LSTMP (LSTM with recurrent projection) layer has a separate projection
+    layer after the LSTM layer, projecting the original hidden state to a
+    lower-dimensional one, which is proposed to reduce the number of total
+    parameters and furthermore computational complexity for the LSTM,
+    espeacially for the case that the size of output units is relative
+    large (https://research.google.com/pubs/archive/43905.pdf).
 
     The formula is as follows:
 
@@ -441,27 +441,27 @@ def dynamic_lstmp(input,
           the matrix of weights from the input gate to the input).
     * :math:`W_{ic}`, :math:`W_{fc}`, :math:`W_{oc}`: Diagonal weight \
           matrices for peephole connections. In our implementation, \
-          we use vectors to reprenset these diagonal weight matrices. 
+          we use vectors to reprenset these diagonal weight matrices.
     * :math:`b`: Denotes bias vectors (e.g. :math:`b_i` is the input gate \
-          bias vector). 
+          bias vector).
     * :math:`\sigma`: The activation, such as logistic sigmoid function.
     * :math:`i, f, o` and :math:`c`: The input gate, forget gate, output \
           gate, and cell activation vectors, respectively, all of which have \
-          the same size as the cell output activation vector :math:`h`. 
+          the same size as the cell output activation vector :math:`h`.
     * :math:`h`: The hidden state.
-    * :math:`r`: The recurrent projection of the hidden state. 
+    * :math:`r`: The recurrent projection of the hidden state.
     * :math:`\\tilde{c_t}`: The candidate hidden state, whose \
           computation is based on the current input and previous hidden state.
-    * :math:`\odot`: The element-wise product of the vectors. 
+    * :math:`\odot`: The element-wise product of the vectors.
     * :math:`act_g` and :math:`act_h`: The cell input and cell output \
-          activation functions and `tanh` is usually used for them. 
+          activation functions and `tanh` is usually used for them.
     * :math:`\overline{act_h}`: The activation function for the projection \
           output, usually using `identity` or same as :math:`act_h`.
 
     Set `use_peepholes` to `False` to disable peephole connection. The formula
     is omitted here, please refer to the paper
     http://www.bioinf.jku.at/publications/older/2604.pdf for details.
-    
+
     Note that these :math:`W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}`
     operations on the input :math:`x_{t}` are NOT included in this operator.
     Users can choose to use fully-connected layer before LSTMP layer.
@@ -479,8 +479,8 @@ def dynamic_lstmp(input,
 
                                - Hidden-hidden weight = {:math:`W_{ch}, W_{ih}, \
                                                 W_{fh}, W_{oh}`}.
-                               - The shape of hidden-hidden weight is (P x 4D), 
-                                 where P is the projection size and D the hidden 
+                               - The shape of hidden-hidden weight is (P x 4D),
+                                 where P is the projection size and D the hidden
                                  size.
                                - Projection weight = {:math:`W_{rh}`}.
                                - The shape of projection weight is (D x P).
@@ -525,9 +525,9 @@ def dynamic_lstmp(input,
             hidden_dim, proj_dim = 512, 256
             fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
                                      act=None, bias_attr=None)
-            proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, 
-                                                     size=hidden_dim * 4, 
-                                                     proj_size=proj_dim, 
+            proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
+                                                     size=hidden_dim * 4,
+                                                     proj_size=proj_dim,
                                                      use_peepholes=False,
                                                      is_reverse=True,
                                                      cell_activation="tanh",
@@ -2525,7 +2525,8 @@ def ctc_greedy_decoder(input, blank, name=None):
                     interval [0, num_classes + 1).
 
     Returns:
-        Variable: CTC greedy decode result.
+        Variable: CTC greedy decode result. If all the sequences in result were
+        empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1].
 
     Examples:
         .. code-block:: python

From 3aae78159b6b9cd12f2a60b071c7e86abf45e7ee Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Tue, 6 Feb 2018 16:36:31 +0800
Subject: [PATCH 3/3] Change the dims of empty result to [1, 1]

---
 paddle/operators/ctc_align_op.cu    | 2 +-
 paddle/operators/ctc_align_op.h     | 2 +-
 python/paddle/v2/fluid/layers/nn.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu
index 918df83eff..cea595d7c5 100644
--- a/paddle/operators/ctc_align_op.cu
+++ b/paddle/operators/ctc_align_op.cu
@@ -82,7 +82,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
     output->Resize({static_cast<int64_t>(host_out_lod0.back()), 1});
 
     if (host_out_lod0.back() == 0) {
-      output->Resize({1});
+      output->Resize({1, 1});
       output->mutable_data<T>(ctx.GetPlace());
       math::SetConstant<platform::CUDADeviceContext, T> set_constant;
       set_constant(ctx.template device_context<platform::CUDADeviceContext>(),
diff --git a/paddle/operators/ctc_align_op.h b/paddle/operators/ctc_align_op.h
index 7a063870f3..54ad1d6f5c 100644
--- a/paddle/operators/ctc_align_op.h
+++ b/paddle/operators/ctc_align_op.h
@@ -71,7 +71,7 @@ class CTCAlignKernel : public framework::OpKernel<T> {
     output->Resize({static_cast<int64_t>(output_lod0.back()), 1});
     // for empty sequence
     if (output_lod0.back() == 0) {
-      output->Resize({1});
+      output->Resize({1, 1});
       output_data = output->mutable_data<T>(ctx.GetPlace());
       output_data[0] = -1;
     }
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index 2209625344..0b3b56bc22 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -2526,7 +2526,7 @@ def ctc_greedy_decoder(input, blank, name=None):
 
     Returns:
         Variable: CTC greedy decode result. If all the sequences in result were
-        empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1].
+        empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1, 1].
 
     Examples:
         .. code-block:: python