From 60f706a1d6f497088f1957354910176e649059e8 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 10 Oct 2017 19:04:29 +0800 Subject: [PATCH 1/3] add SQRT strategy for sequence_pool_op --- paddle/operators/sequence_pool_op.cc | 14 +++++------ paddle/operators/sequence_pool_op.h | 8 ++++++ .../v2/framework/tests/test_seq_pool.py | 25 +++++++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 06c00d31ea..9b8d86b404 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -36,11 +36,9 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { SequencePoolOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", - "A float LoDTensor, the variable-length input of SequencePoolOp"); - AddOutput( - "Out", - "A float LoDTensor, the variable-length output of SequencePoolOp."); + AddInput("X", "A LoDTensor, the variable-length input of SequencePoolOp"); + AddOutput("Out", + "A LoDTensor, the variable-length output of SequencePoolOp."); AddAttr( "strategy", "(int, default AVERAGE) the pooling strategy of SequencePoolOp.") @@ -49,13 +47,13 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( SequencePoolOp pools features of all time-steps of each instance. - For a mini-batch of 3 variable lengths sentences, containing 2, 3, and 2 time-steps: + For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps: - Assume X is a [7,M,N] float LoDTensor, and X->lod()[0] = [0, 2, 5, 7]. + Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2. Besides, for the sake of simplicity, we assume M=1 and N=1, and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. - Thus, Out is a [3,1,1] float LoDTensor, but Out->lod() is nullptr. + Thus, Out is a [3,1,1] LoDTensor, but Out->lod() is nullptr. And for different strategy, the value of Out is as follows: - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index 752d714125..fd056b71cf 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -77,6 +77,10 @@ class SequencePoolKernel : public framework::OpKernel { case SUM: out_e.device(place) = in_e.sum(Eigen::array({{0}})); break; + case SQRT: + out_e.device(place) = in_e.sum(Eigen::array({{0}})) / + std::sqrt(static_cast(h)); + break; default: PADDLE_THROW("unsupported pooling strategy"); } @@ -115,6 +119,10 @@ class SequencePoolGradKernel : public framework::OpKernel { case SUM: in_g_e.device(place) = (out_g_e).broadcast(bcast); break; + case SQRT: + in_g_e.device(place) = + (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); + break; default: PADDLE_THROW("unsupported pooling strategy"); } diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py index 211086e5f4..fbcf6dac93 100644 --- a/python/paddle/v2/framework/tests/test_seq_pool.py +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -82,5 +82,30 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) +class TestSeqSqrtPool(TestSeqAvgPool): + def compute(self): + self.attrs = {'strategy': SeqPoolType.SQRT} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + len = lod[0][i + 1] - lod[0][i] + out[i] = sub_x.sum(axis=0) / np.sqrt(len) + + +class TestSeqSqrtPool2D(TestSeqAvgPool2D): + def compute(self): + self.attrs = {'strategy': SeqPoolType.SQRT} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + len = lod[0][i + 1] - lod[0][i] + out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17)) + + def test_check_grad(self): + self.check_grad(["X"], "Out", max_relative_error=0.06) + + if __name__ == '__main__': unittest.main() From 393c748c89049a7d9b8991266eeec09558395cc5 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 12 Oct 2017 19:35:46 +0800 Subject: [PATCH 2/3] add seqlastin/seqfirstin for seq_pool op --- paddle/operators/sequence_pool_op.h | 17 ++++++++ .../v2/framework/tests/test_seq_pool.py | 40 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index fd056b71cf..8bfb80c33f 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { @@ -81,6 +82,12 @@ class SequencePoolKernel : public framework::OpKernel { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h)); break; + case LAST: + out_e.device(place) = in_e.chip(h - 1, 0); + break; + case FIRST: + out_e.device(place) = in_e.chip(0, 0); + break; default: PADDLE_THROW("unsupported pooling strategy"); } @@ -102,6 +109,10 @@ class SequencePoolGradKernel : public framework::OpKernel { int64_t w = in->numel() / dims[0]; in_g->mutable_data(context.GetPlace()); + if (strategy > 2) { + // set X@Grad be zero at first when strategy is LAST/FIRST/MAX + math::SetConstant(context.device_context(), in_g, 0); + } auto place = context.GetEigenDevice(); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { auto in_g_t = in_g->Slice(static_cast(lod[i]), @@ -123,6 +134,12 @@ class SequencePoolGradKernel : public framework::OpKernel { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); break; + case LAST: + in_g_e.chip(h - 1, 0).device(place) = out_g_e; + break; + case FIRST: + in_g_e.chip(0, 0).device(place) = out_g_e; + break; default: PADDLE_THROW("unsupported pooling strategy"); } diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py index fbcf6dac93..0ebf78bf8f 100644 --- a/python/paddle/v2/framework/tests/test_seq_pool.py +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -107,5 +107,45 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): self.check_grad(["X"], "Out", max_relative_error=0.06) +class TestSeqLastPool(TestSeqAvgPool): + def compute(self): + self.attrs = {'strategy': SeqPoolType.LAST} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = sub_x[-1, :] + + +class TestSeqLastPool2D(TestSeqAvgPool2D): + def compute(self): + self.attrs = {'strategy': SeqPoolType.LAST} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + out[i] = np.reshape(sub_x[-1, :], (3, 17)) + + +class TestSeqFirstPool(TestSeqAvgPool): + def compute(self): + self.attrs = {'strategy': SeqPoolType.FIRST} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = sub_x[0, :] + + +class TestSeqFirstPool2D(TestSeqAvgPool2D): + def compute(self): + self.attrs = {'strategy': SeqPoolType.FIRST} + x, lod = self.inputs['X'] + out = self.outputs['Out'] + for i in range(4): + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + out[i] = np.reshape(sub_x[0, :], (3, 17)) + + if __name__ == '__main__': unittest.main() From 6a4282a20f1f9c110ea5aef5035a0b733da6db19 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 16 Oct 2017 20:02:04 +0800 Subject: [PATCH 3/3] refine comments of sequence_pool_op --- paddle/operators/sequence_pool_op.cc | 7 ++++--- paddle/operators/sequence_pool_op.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 9b8d86b404..8dc4a59ba8 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -36,9 +36,10 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { SequencePoolOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "A LoDTensor, the variable-length input of SequencePoolOp"); + AddInput("X", "(LoDTensor), the variable-length input of SequencePoolOp"); AddOutput("Out", - "A LoDTensor, the variable-length output of SequencePoolOp."); + "(Tensor), output of SequencePoolOp, which does not contain LoD " + "infomation."); AddAttr( "strategy", "(int, default AVERAGE) the pooling strategy of SequencePoolOp.") @@ -53,7 +54,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { Besides, for the sake of simplicity, we assume M=1 and N=1, and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. - Thus, Out is a [3,1,1] LoDTensor, but Out->lod() is nullptr. + Thus, Out is a [3,1,1] Tensor without LoD infomation. And for different strategy, the value of Out is as follows: - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index 8bfb80c33f..ce68204d41 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -109,8 +109,8 @@ class SequencePoolGradKernel : public framework::OpKernel { int64_t w = in->numel() / dims[0]; in_g->mutable_data(context.GetPlace()); - if (strategy > 2) { - // set X@Grad be zero at first when strategy is LAST/FIRST/MAX + if (strategy == LAST || strategy == FIRST) { + // set X@Grad be zero at first when strategy is LAST/FIRST math::SetConstant(context.device_context(), in_g, 0); } auto place = context.GetEigenDevice();