support 2-level lod of input in sequence_pool (#19839)

* support 2-level lod of input in sequence_pool test=develop * fix lod level bug in .cu test=develop
6 years ago · fcf53e55ff
parent b25d1e758d
commit fcf53e55ff
6 changed files with 234 additions and 104 deletions
--- a/paddle/fluid/operators/math/sequence_pooling.cc
+++ b/paddle/fluid/operators/math/sequence_pooling.cc
@ -37,18 +37,23 @@ class MaxSeqPoolFunctor {
 public:
  void operator()(const platform::CPUDeviceContext& context,
                  const framework::LoDTensor& input, T pad_value,
-                  framework::Tensor* output, framework::Tensor* index) {
+                  framework::LoDTensor* output, framework::Tensor* index) {
    auto in_dims = input.dims();
    auto out_dims = output->dims();
    auto idx_dims = index->dims();
-    PADDLE_ENFORCE_GT(in_dims.size(), 1);
-    PADDLE_ENFORCE_GT(out_dims.size(), 1);
+    PADDLE_ENFORCE_GT(in_dims.size(), 1,
+                      "The rank of input shall be greater than 1.");
+    PADDLE_ENFORCE_GT(out_dims.size(), 1,
+                      "The rank of output shall be greater than 1.");
    for (int64_t i = 1; i < in_dims.size(); ++i) {
-      PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
+      PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i],
+                        "The dimension of input and output shall be same.");
    }
-    PADDLE_ENFORCE_EQ(idx_dims, out_dims);
+    PADDLE_ENFORCE_EQ(idx_dims, out_dims,
+                      "The dimension of index and output shall be same.");

-    auto starts = input.lod()[0];
+    auto lod_level = input.lod().size();
+    auto starts = input.lod()[lod_level - 1];
    const T* in_data = input.data<T>();
    T* out_data = output->data<T>();
    int* max_index = index->data<int>();
@ -85,16 +90,20 @@ class MaxSeqPoolFunctor<T, true> {
 public:
  void operator()(const platform::CPUDeviceContext& context,
                  const framework::LoDTensor& input, T pad_value,
-                  framework::Tensor* output, framework::Tensor* index) {
+                  framework::LoDTensor* output, framework::Tensor* index) {
    auto in_dims = input.dims();
    auto out_dims = output->dims();
-    PADDLE_ENFORCE_GT(in_dims.size(), 1);
-    PADDLE_ENFORCE_GT(out_dims.size(), 1);
+    PADDLE_ENFORCE_GT(in_dims.size(), 1,
+                      "The rank of input shall be greater than 1.");
+    PADDLE_ENFORCE_GT(out_dims.size(), 1,
+                      "The rank of output shall be greater than 1.");
    for (int64_t i = 1; i < in_dims.size(); ++i) {
-      PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
+      PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i],
+                        "The dimension of input and output shall be same.");
    }

-    auto starts = input.lod()[0];
+    auto lod_level = input.lod().size();
+    auto starts = input.lod()[lod_level - 1];
    const T* in_data = input.data<T>();
    T* out_data = output->data<T>();

@ -123,18 +132,23 @@ template <typename T>
 class MaxSeqPoolGradFunctor {
 public:
  void operator()(const platform::CPUDeviceContext& context,
-                  const framework::Tensor& out_grad,
+                  const framework::LoDTensor& out_grad,
                  const framework::Tensor& index,
                  framework::LoDTensor* in_grad) {
    auto og_dims = out_grad.dims();
    auto ig_dims = in_grad->dims();
    auto idx_dims = index.dims();
-    PADDLE_ENFORCE_GT(og_dims.size(), 1);
-    PADDLE_ENFORCE_GT(ig_dims.size(), 1);
+    PADDLE_ENFORCE_GT(og_dims.size(), 1,
+                      "The rank of output@Grad shall be greater than 1.");
+    PADDLE_ENFORCE_GT(ig_dims.size(), 1,
+                      "The rank of input@Grad shall be greater than 1.");
    for (int64_t i = 1; i < og_dims.size(); ++i) {
-      PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
+      PADDLE_ENFORCE_EQ(
+          og_dims[i], ig_dims[i],
+          "The dimension of input@Grad and output@Grad shall be same.");
    }
-    PADDLE_ENFORCE_EQ(idx_dims, og_dims);
+    PADDLE_ENFORCE_EQ(idx_dims, og_dims,
+                      "The dimension of index and output@Grad shall be same.");

    const T* og_data = out_grad.data<T>();
    const int* max_index = index.data<int>();
@ -159,14 +173,15 @@ class LastSeqPoolFunctor {
 public:
  void operator()(const platform::CPUDeviceContext& context,
                  const framework::LoDTensor& input, T pad_value,
-                  framework::Tensor* output) {
+                  framework::LoDTensor* output) {
    // Create pointers to input and output data
    auto* in_data = input.data<T>();
    auto* out_data = output->data<T>();

    // Calculate the size of each item in sequence
    int64_t item_size = input.numel() / input.dims()[0];
-    auto lod = input.lod()[0];
+    auto lod_level = input.lod().size();
+    auto lod = input.lod()[lod_level - 1];
    int seq_num = static_cast<int>(lod.size()) - 1;
    for (int i = 0; i < seq_num; ++i) {
      // Calculate the length of each sequence
@ -191,14 +206,15 @@ class FirstSeqPoolFunctor {
 public:
  void operator()(const platform::CPUDeviceContext& context,
                  const framework::LoDTensor& input, T pad_value,
-                  framework::Tensor* output) {
+                  framework::LoDTensor* output) {
    // Create pointers to input and output data
    auto* in_data = input.data<T>();
    auto* out_data = output->data<T>();

    // Calculate the size of each item in sequence
    int64_t item_size = input.numel() / input.dims()[0];
-    auto lod = input.lod()[0];
+    auto lod_level = input.lod().size();
+    auto lod = input.lod()[lod_level - 1];
    int seq_num = static_cast<int>(lod.size()) - 1;
    for (int i = 0; i < seq_num; ++i) {
      // Calculate the length of each sequence
@ -222,12 +238,15 @@ template <typename T>
 class SumSeqPoolGradFunctor {
 public:
  void operator()(const platform::CPUDeviceContext& context,
-                  const framework::Tensor& out_grad,
+                  const framework::LoDTensor& out_grad,
                  framework::LoDTensor* in_grad) {
-    auto lod = in_grad->lod()[0];
+    auto lod_level = in_grad->lod().size();
+    auto lod = in_grad->lod()[lod_level - 1];
    int64_t out_w = out_grad.numel() / out_grad.dims()[0];
    int64_t in_w = in_grad->numel() / in_grad->dims()[0];
-    PADDLE_ENFORCE(in_w == out_w);
+    PADDLE_ENFORCE_EQ(
+        in_w, out_w,
+        "The feature size of input@Grad and output@Grad shall be same.");
    const T* out_g_data = out_grad.data<T>();
    T* in_g_data = in_grad->mutable_data<T>(context.GetPlace());
    auto blas = math::GetBlas<platform::CPUDeviceContext, T>(context);
@ -250,8 +269,9 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
  /* max pool has index output */
  void operator()(const platform::CPUDeviceContext& context,
                  const std::string pooltype, T pad_value,
-                  const framework::LoDTensor& input, framework::Tensor* output,
-                  bool is_test, framework::Tensor* index = nullptr) {
+                  const framework::LoDTensor& input,
+                  framework::LoDTensor* output, bool is_test,
+                  framework::Tensor* index = nullptr) {
    if (pooltype == "MAX") {
      if (is_test) {
        math::MaxSeqPoolFunctor<T, true> max_pool;
@ -272,11 +292,13 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
      first_pool(context, input, pad_value, output);
      return;
    }
-
-    auto lod = input.lod()[0];
+    auto lod_level = input.lod().size();
+    auto lod = input.lod()[lod_level - 1];
    if (pooltype == "SUM") {
      auto place = context.GetPlace();
-      PADDLE_ENFORCE(platform::is_cpu_place(place));
+      PADDLE_ENFORCE_EQ(
+          platform::is_cpu_place(place), true,
+          "Sequence_pool should run on CPU Device when pooltype is SUM");
      const T* src = input.data<T>();
      T* dst = output->mutable_data<T>(place);
      jit::seq_pool_attr_t attr(
@ -330,7 +352,8 @@ template <typename T>
 class SequencePoolGradFunctor<platform::CPUDeviceContext, T> {
 public:
  void operator()(const platform::CPUDeviceContext& context,
-                  const std::string pooltype, const framework::Tensor& out_grad,
+                  const std::string pooltype,
+                  const framework::LoDTensor& out_grad,
                  framework::LoDTensor* in_grad,
                  /* max pool has index */
                  const framework::Tensor* index = nullptr) {
@ -352,7 +375,8 @@ class SequencePoolGradFunctor<platform::CPUDeviceContext, T> {
      return;
    }

-    auto lod = in_grad->lod()[0];
+    auto lod_level = in_grad->lod().size();
+    auto lod = in_grad->lod()[lod_level - 1];
    auto& place = *context.eigen_device();
    for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
      if (lod[i] == lod[i + 1]) continue;
--- a/paddle/fluid/operators/math/sequence_pooling.cu
+++ b/paddle/fluid/operators/math/sequence_pooling.cu
@ -159,9 +159,11 @@ class SequencePoolFunctor<platform::CUDADeviceContext, T> {
 public:
  void operator()(const platform::CUDADeviceContext& context,
                  const std::string pooltype, T pad_value,
-                  const framework::LoDTensor& input, framework::Tensor* output,
-                  bool is_test, framework::Tensor* index = nullptr) {
-    auto& lod = input.lod()[0];
+                  const framework::LoDTensor& input,
+                  framework::LoDTensor* output, bool is_test,
+                  framework::Tensor* index = nullptr) {
+    auto lod_level = input.lod().size();
+    auto& lod = input.lod()[lod_level - 1];
    const size_t item_dim = output->numel() / output->dims()[0];
    dim3 threads(1024, 1);
    dim3 grid(lod.size(), 1);
@ -319,11 +321,13 @@ template <typename T>
 class SequencePoolGradFunctor<platform::CUDADeviceContext, T> {
 public:
  void operator()(const platform::CUDADeviceContext& context,
-                  const std::string pooltype, const framework::Tensor& out_grad,
+                  const std::string pooltype,
+                  const framework::LoDTensor& out_grad,
                  framework::LoDTensor* in_grad,
                  /* max pool has index */
                  const framework::Tensor* index = nullptr) {
-    auto& lod = in_grad->lod()[0];
+    auto lod_level = in_grad->lod().size();
+    auto& lod = in_grad->lod()[lod_level - 1];
    const size_t item_dim = in_grad->numel() / in_grad->dims()[0];
    dim3 threads(1024, 1);
    dim3 grid(lod.size(), 1);
--- a/paddle/fluid/operators/math/sequence_pooling.h
+++ b/paddle/fluid/operators/math/sequence_pooling.h
@ -28,7 +28,7 @@ class SequencePoolFunctor {
  /* max pool has index output */
  void operator()(const DeviceContext& context, const std::string pooltype,
                  T pad_value, const framework::LoDTensor& input,
-                  framework::Tensor* output, bool is_test = false,
+                  framework::LoDTensor* output, bool is_test = false,
                  framework::Tensor* index = nullptr);
 };

@ -36,7 +36,7 @@ template <typename DeviceContext, typename T>
 class SequencePoolGradFunctor {
 public:
  void operator()(const DeviceContext& context, const std::string pooltype,
-                  const framework::Tensor& out_grad,
+                  const framework::LoDTensor& out_grad,
                  framework::LoDTensor* in_grad,
                  /* max pool has index */
                  const framework::Tensor* index = nullptr);
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
@ -24,14 +24,15 @@ class SequencePoolOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"),
-                   "Input(X) of SequencePoolOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                   "Output(Out) of SequencePoolOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
+                      "Input(X) of SequencePoolOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
+                      "Output(Out) of SequencePoolOp should not be null.");
    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
    if (ctx->Attrs().Get<std::string>("pooltype") == "MAX") {
-      PADDLE_ENFORCE(ctx->HasOutput("MaxIndex"),
-                     "Output(MaxIndex) of SequencePoolOp should not be null.");
+      PADDLE_ENFORCE_EQ(
+          ctx->HasOutput("MaxIndex"), true,
+          "Output(MaxIndex) of SequencePoolOp should not be null.");
      ctx->SetOutputDim("MaxIndex", ctx->GetInputDim("X"));
    }
  }
@ -102,9 +103,10 @@ class SequencePoolGradOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
-                   "Gradient of Out should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true,
+                      "Gradient of Out should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
+                      "The input X should not be null.");
    auto og_dims = ctx->GetInputDim(framework::GradVarName("Out"));
    auto x_dims = ctx->GetInputDim("X");
    PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(),
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
@ -30,19 +30,30 @@ class SequencePoolKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<Tensor>("Out");
+    auto* out = context.Output<LoDTensor>("Out");
    std::string pooltype = context.Attr<std::string>("pooltype");
    T pad_value = static_cast<T>(context.Attr<float>("pad_value"));

    auto dims = in->dims();
    auto lod = in->lod();
+    auto lod_level = lod.size();
    // InferShape by lod
-    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
+    PADDLE_ENFORCE_GE(lod_level, 1UL,
+                      "The lod level of input shall be 1 at least.");
+    PADDLE_ENFORCE_LE(lod_level, 2UL,
+                      "The lod level of input shall be no more than 2.");
    PADDLE_ENFORCE_GE(
        dims[0],
-        /*batch size = */ static_cast<int64_t>(lod[0].size() - 1),
+        /*batch size = */ static_cast<int64_t>(lod[lod_level - 1].size() - 1),
        "The first dimension of Input(X) must be large than batch size.");
-    dims[0] = lod[0].size() - 1;
+    if (lod_level > 1UL) {
+      PADDLE_ENFORCE_EQ(lod[0][lod[0].size() - 1], lod[1].size() - 1,
+                        "The input lod information is illegal.");
+      framework::LoD out_lod;
+      out_lod.push_back(lod[0]);
+      out->set_lod(out_lod);
+    }
+    dims[0] = lod[lod_level - 1].size() - 1;
    out->Resize({dims});
    out->mutable_data<T>(context.GetPlace());
    Tensor* index = nullptr;
@ -68,7 +79,7 @@ template <typename DeviceContext, typename T>
 class SequencePoolGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    auto* out_g = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
    std::string pooltype = context.Attr<std::string>("pooltype");
    const Tensor* index = nullptr;
--- a/python/paddle/fluid/tests/unittests/test_seq_pool.py
+++ b/python/paddle/fluid/tests/unittests/test_seq_pool.py