fix sequence_project_op forward and backward

8 years ago · 834b82f109
parent 40688d223e
commit 834b82f109
3 changed files with 292 additions and 126 deletions
--- a/paddle/operators/sequence_project_op.cc
+++ b/paddle/operators/sequence_project_op.cc
@ -38,24 +38,23 @@ class SequenceProjectOp : public framework::OperatorWithKernel {
      PADDLE_ENFORCE(
          ctx->HasInput("PaddingData"),
          "Output(PaddingData) of SequenceProjectOp should not be null.");
-      framework::DDim padding_dim = ctx->GetOutputDim("PaddingData");
+      framework::DDim padding_dim = ctx->GetInputDim("PaddingData");
      int up_pad = std::max(0, -context_start);
      int down_pad = std::max(0, context_start + context_length - 1);
      int total_pad = up_pad + down_pad;
      int input_width = static_cast<int>(in_dims[1]);
      if (context_start == 0 && context_length == 1) {
        PADDLE_THROW(
            "if context_start == 0 && context_length == 1, padding_trainable "
            "should be false.");
      }
      PADDLE_ENFORCE(padding_dim.size() == 2,
                     "Input(PaddingData) should be 2-D tensor.");
      PADDLE_ENFORCE(
          padding_dim[0] == total_pad && padding_dim[1] == input_width,
          "Input(PaddingData)'s shape is not consistent with 'context_start' "
          "and 'context_length'.");
      if (context_start == 0 && context_length == 1) {
        PADDLE_THROW(
            "if context_start == 0 && context_length == 1, padding_trainable "
            "should be false.");
      }
    }
    in_dims[1] = in_dims[1] * context_length;
@ -74,9 +73,11 @@ class SequenceProjectGradOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null.");
    if (ctx->Attrs().Get<bool>("padding_trainable")) {
-      PADDLE_ENFORCE(
+      PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("PaddingData")),
-          ctx->HasOutput("PaddingData"),
+                     "Output(PaddingData@GRAD) of SequenceProjectGradOp should "
-          "Output(PaddingData) of SequenceProjectOp should not be null.");
+                     "not be null.");
      auto padding_dims = ctx->GetInputDim("PaddingData");
      ctx->SetOutputDim(framework::GradVarName("PaddingData"), padding_dims);
    }
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }
@ -93,8 +94,8 @@ class SequenceProjectOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput(
        "Out",
        "A float LoDTensor, the variable-length output of SequenceProjectOp.");
-    AddOutput("PaddingData",
+    AddInput("PaddingData",  // PaddingData can be a float tensor
-              "A float LoDTensor, the padding data of SequenceProjectOp.");
+             "A float LoDTensor, the padding data of SequenceProjectOp.");
    AddAttr<bool>("padding_trainable",
                  "(bool, default false) the padding data of SequenceProjectOp "
@ -110,7 +111,8 @@ class SequenceProjectOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<int>("context_stride",
                 "(int, default 1) the xx of SequenceProjectOp.")
        .SetDefault(1)
-        .GreaterThan(0);
+        .GreaterThan(
            0);  // Currently, sequence_project_op only support context_stride=1
    AddComment(R"DOC(
    SequenceProjectOp projects features of context_length time-steps of each instance.
--- a/paddle/operators/sequence_project_op.h
+++ b/paddle/operators/sequence_project_op.h
--- a/python/paddle/v2/framework/tests/test_seq_project.py
+++ b/python/paddle/v2/framework/tests/test_seq_project.py
@ -1,5 +1,6 @@
 import unittest
 import numpy as np
 import random
 from op_test import OpTest
@ -10,18 +11,22 @@ class TestSeqProject(OpTest):
        # one level, batch size
        x = np.random.uniform(
            0.1, 1, [self.input_size[0], self.input_size[1]]).astype('float32')
        lod = [[0, 4, 5, 8, self.input_size[0]]]
        self.begin_pad = np.max([0, -self.context_start])
        self.end_pad = np.max([0, self.context_start + self.context_length - 1])
        self.total_pad = self.begin_pad + self.end_pad
-        w = np.ones((self.total_pad, self.input_size[1])) * 100
+        # w =  np.ones((self.total_pad, self.input_size[1])) * 100
-
+        w = np.array(range(self.total_pad * self.input_size[1]))
-        self.inputs = {'X': (x, lod), 'PaddingData': w}
+        w.shape = self.total_pad, self.input_size[1]
        self.inputs = {
            'X': (x, self.lod),
            'PaddingData': (w, [[0, self.total_pad]])
        }
        self.attrs = {
            'context_start': self.context_start,
            'context_length': self.context_length,
-            'padding_trainable': self.padding_trainable
+            'padding_trainable': self.padding_trainable,
            'context_stride': self.context_stride
        }
        out = np.zeros((self.input_size[0], self.input_size[1] *
                        self.context_length)).astype('float32')
@ -30,9 +35,10 @@ class TestSeqProject(OpTest):
    def compute(self):
        x, lod = self.inputs['X']
-        w = self.inputs['PaddingData']
+        w, _ = self.inputs['PaddingData']
        out = self.outputs['Out']
        lod = lod[0]
        begin_pad = np.max([0, -self.context_start])
        for i in range(len(lod) - 1):
            for j in range(self.context_length):
@ -43,22 +49,20 @@ class TestSeqProject(OpTest):
                if in_begin < lod[i]:
                    pad_size = np.min([lod[i] - in_begin, lod[i + 1] - lod[i]])
                    if self.padding_trainable:
-                        sub_w = w[j:pad_size, :]
+                        sub_w = w[j:j + pad_size, :]
                        out[lod[i]:lod[i] + pad_size, j * self.input_size[1]:(
                            j + 1) * self.input_size[1]] = sub_w
                        # pass
                    out_begin = lod[i] + pad_size
                    in_begin = lod[i]
                if in_end > lod[i + 1]:
                    pad_size = np.min(
                        [in_end - lod[i + 1], lod[i + 1] - lod[i]])
                    out_sub = out[lod[i + 1] - pad_size:lod[i + 1], :]
                    if self.padding_trainable:
-                        sub_w = w[j - pad_size:j, :]
+                        sub_w = w[begin_pad + self.context_start + j - pad_size:
                                  begin_pad + self.context_start + j, :]
                        out[lod[i + 1] - pad_size:lod[i + 1], j * self.
                            input_size[1]:(j + 1) * self.input_size[1]] = sub_w
                        # pass
                    in_end = lod[i + 1]
                    out_end = lod[i + 1] - pad_size
                if in_end <= in_begin:
@ -69,28 +73,105 @@ class TestSeqProject(OpTest):
                    self.input_size[1]] += in_sub
    def init_test_case(self):
-        self.input_size = [11, 23]
+        self.input_row = 11
        self.input_size = [self.input_row, 23]
        self.lod = [[0, 4, 5, 8, self.input_row]]
        self.op_type = "sequence_project"
        self.context_start = -1
        self.context_length = 3
-        self.padding_trainable = False
+        self.padding_trainable = True
        self.context_stride = 1
    def test_check_output(self):
        self.check_output()
    # def test_check_grad(self):
-    #     self.check_grad(["X"], "Out")
+    #     self.check_grad(
    #         set(['X', 'PaddingData']), 'Out', max_relative_error=0.05)
-    # class TestSeqAvgPool2D(TestSeqProject):
+    # def test_check_grad_no_filter(self):
-    #     def init_test_case(self):
+    #     self.check_grad(
-    #         self.input_size = [11, 23]
+    #         ['X'],
-    #         self.op_type = "sequence_project"
+    #         'Out',
    #         max_relative_error=0.05,
    #         no_grad_set=set(['PaddingData']))
    #
-    #         self.context_start = -1
+    # def test_check_grad_no_input(self):
-    #         self.context_length = 3
+    #     self.check_grad(
-    #         self.padding_trainable = True
+    #         ['PaddingData'],
    #         'Out',
    #         max_relative_error=0.05,
    #         no_grad_set=set(['X']))
 '''
 class TestSeqProjectCases(TestSeqProject):
    def setUp(self):
        self.init_test_case()
        self.op_type = 'sequence_project'
        num = 0
        for context_start in [-5, -3, -1, 0, 3]:
            for context_length in [1, 2, 5, 7]:
                for batch_size in [1, 2, 5, 7]:
                    for padding_trainable in [False, True]:
                        if context_length == 1 and context_start == 0 and padding_trainable:
                            continue
                        self.context_start = context_start
                        self.context_length = context_length
                        self.padding_trainable = padding_trainable
                        self.input_size = [batch_size, 23]
                        x = np.random.uniform(0.1, 1,
                                              self.input_size).astype('float32')
                        self.lod = [[0, self.input_size[0]]]
                        if self.input_size[0] > 2:
                            idx = range(self.input_size[0])
                            del idx[0]
                            self.lod = [
                                [0] + np.sort(random.sample(idx, 2)).tolist() +
                                [self.input_size[0]]
                            ]
                        self.begin_pad = np.max([0, -self.context_start])
                        self.end_pad = np.max(
                            [0, self.context_start + self.context_length - 1])
                        self.total_pad = self.begin_pad + self.end_pad
                        # w =  np.ones((self.total_pad, self.input_size[1])) * 100
                        w = np.array(range(self.total_pad * self.input_size[1]))
                        w.shape = self.total_pad, self.input_size[1]
                        if self.total_pad * self.input_size[1] == 0:
                            w = np.random.uniform(
                                0.1, 1,
                                (1, self.input_size[1])).astype('float32')
                            self.total_pad = 1
                        self.inputs = {
                            'X': (x, self.lod),
                            'PaddingData': (w, [[0, self.total_pad]])
                        }
                        self.attrs = {
                            'context_start': self.context_start,
                            'context_length': self.context_length,
                            'padding_trainable': self.padding_trainable,
                            'context_stride': self.context_stride
                        }
                        out = np.zeros((self.input_size[0], self.input_size[1] *
                                        self.context_length)).astype('float32')
                        self.outputs = {'Out': out}
                        print num
                        print self.attrs
                        print batch_size
                        print padding_trainable
                        print "$$$$$$$$$$$$$"
                        self.compute()
                        self.test_check_output()
                        num += 1
 '''
 if __name__ == '__main__':
    unittest.main()