Check and correct the output's lod_level in DynamicRNN related operators (#19144)

* Refine the InferShape of ReadFrom and WriteTo op, and add comment to explain why not call ShareLoD for runtime. test=develop * Add comment for ReorderLoDTensorByRank op. * Add comment for lod_tensor_to_tensor_array op to explain why only call DecreaseLoDLevel for compile time. test=develop * ShrinkRNNMemory op should call ShareLoD for compile time. test=develop * Add the implementation of IncreaseLoDLevel and add the compile-time check of lod_level in InferShape of sequence_pool. test=develop * Refine the unittest of DynamicRNN. test=develop * Change PADDLE_ENFORCE to PADDLE_ENFORCE_NE. test=develop
6 years ago · 6fcfd32e6c
parent b5f3be8330
commit 6fcfd32e6c
10 changed files with 252 additions and 153 deletions
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@ -86,7 +86,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
    auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
    if (in_var->GetType() != proto::VarType::LOD_TENSOR &&
        in_var->GetType() != proto::VarType::LOD_TENSOR_ARRAY) {
-      VLOG(3) << "input " << in << " is not LodTensor or LodTensorArray.";
+      VLOG(3) << "input " << in << " is not LoDTensor or LoDTensorArray.";
      return;
    }
    out_var->SetLoDLevel(in_var->GetLoDLevel());
@ -94,6 +94,8 @@ class CompileTimeInferShapeContext : public InferShapeContext {

  void DecreaseLoDLevel(const std::string &in, const std::string &out,
                        size_t i = 0, size_t j = 0) const override {
+    // When in is a LoDTensor and out is a LoDTensorArray, there may need to
+    // decrease the lod_level.
    PADDLE_ENFORCE_LT(i, Inputs(in).size());
    PADDLE_ENFORCE_LT(j, Outputs(out).size());
    PADDLE_ENFORCE(Inputs(in)[i] != framework::kEmptyVarName,
@ -102,17 +104,35 @@ class CompileTimeInferShapeContext : public InferShapeContext {
                   "The %s[%d] is @EMPTY@", out, j);
    auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
    auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
-    PADDLE_ENFORCE(out_var->GetType() == proto::VarType::LOD_TENSOR_ARRAY ||
-                       out_var->GetType() == proto::VarType::LOD_TENSOR,
-                   "The input %s should be LodTensorArray or LodTensor.",
-                   out_var->Name());
-    PADDLE_ENFORCE(in_var->GetType() == proto::VarType::LOD_TENSOR,
-                   "The input %s should be LodTensor.", in_var->Name());
+    PADDLE_ENFORCE_EQ(in_var->GetType(), proto::VarType::LOD_TENSOR,
+                      "The input %s should be LoDTensor.", in_var->Name());
+    PADDLE_ENFORCE_EQ(out_var->GetType(), proto::VarType::LOD_TENSOR_ARRAY,
+                      "The output %s should be LoDTensorArray.",
+                      out_var->Name());
    if (in_var->GetLoDLevel() > 0) {
      out_var->SetLoDLevel(in_var->GetLoDLevel() - 1);
    }
  }

+  void IncreaseLoDLevel(const std::string &in, const std::string &out,
+                        size_t i = 0, size_t j = 0) const override {
+    // When in is a LoDTensorArray and out is a LoDTensor, there may need to
+    // increase the lod_level.
+    PADDLE_ENFORCE_LT(i, Inputs(in).size());
+    PADDLE_ENFORCE_LT(j, Outputs(out).size());
+    PADDLE_ENFORCE_NE(Inputs(in)[i], framework::kEmptyVarName,
+                      "The %s[%d] is @EMPTY@", in, i);
+    PADDLE_ENFORCE_NE(Outputs(out)[j], framework::kEmptyVarName,
+                      "The %s[%d] is @EMPTY@", out, j);
+    auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
+    auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
+    PADDLE_ENFORCE_EQ(in_var->GetType(), proto::VarType::LOD_TENSOR_ARRAY,
+                      "The input %s should be LoDTensorArray.", in_var->Name());
+    PADDLE_ENFORCE_EQ(out_var->GetType(), proto::VarType::LOD_TENSOR,
+                      "The output %s should be LoDTensor.", out_var->Name());
+    out_var->SetLoDLevel(in_var->GetLoDLevel() + 1);
+  }
+
  std::vector<InferShapeVarPtr> GetInputVarPtrs(
      const std::string &name) override {
    const std::vector<std::string> arg_names = Inputs(name);
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@ -657,7 +657,18 @@ class RuntimeInferShapeContext : public InferShapeContext {

  void DecreaseLoDLevel(const std::string& in, const std::string& out,
                        size_t i = 0, size_t j = 0) const override {
-    PADDLE_THROW("DecreaseLoDLevel is only used in compile time.");
+    PADDLE_THROW(
+        "DecreaseLoDLevel is only used in compile time. The calculation of "
+        "output's actual lod is different among operators so that should be "
+        "set in the runtime kernel.");
+  }
+
+  void IncreaseLoDLevel(const std::string& in, const std::string& out,
+                        size_t i = 0, size_t j = 0) const override {
+    PADDLE_THROW(
+        "IncreaseLoDLevel is only used in compile time. The calculation of "
+        "output's actual lod is different among operators so that should be "
+        "set in the runtime kernel.");
  }

  bool IsRuntime() const override { return true; }
--- a/paddle/fluid/framework/shape_inference.h
+++ b/paddle/fluid/framework/shape_inference.h
@ -68,6 +68,9 @@ class InferShapeContext {
  virtual void DecreaseLoDLevel(const std::string &in, const std::string &out,
                                size_t i = 0, size_t j = 0) const = 0;

+  virtual void IncreaseLoDLevel(const std::string &in, const std::string &out,
+                                size_t i = 0, size_t j = 0) const = 0;
+
  virtual bool IsRuntime() const = 0;

  virtual std::vector<InferShapeVarPtr> GetInputVarPtrs(
--- a/paddle/fluid/operators/array_to_lod_tensor_op.cc
+++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc
@ -192,7 +192,21 @@ class ArrayToLoDTensorInferShape : public framework::InferShapeBase {
                   "ArrayToLoDTensorOp must has input X.");
    PADDLE_ENFORCE(context->HasInput("RankTable"),
                   "ArrayToLoDTensorOp must has input RankTable.");
+    // For compile-time, the first dim of input X and output Out should be -1.
+    // For runtime, the first dim of output Out should be the sum of all
+    // elements's first dim in input X. The output's dims will be re-computed in
+    // detail kernel implementation.
    context->SetOutputDim("Out", context->GetInputDim("X"));
+
+    // The output LoDTensor's lod_level should be input X's lod_level + 1.
+    // For compile-time, we call IncreaseLoDLevel to set output's lod_level.
+    // For runtime, output LoDTensor's lod is determined by input X's lod and
+    // the level specified by input RandTable.
+    // We cannot get X's detail lod and RankTable's level in this function, so
+    // leave this work to the detail kernel implementation.
+    if (!context->IsRuntime()) {
+      context->IncreaseLoDLevel("X", /*->*/ "Out");
+    }
  }
 };

--- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
+++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
@ -88,8 +88,21 @@ class WriteToArrayInferShape : public framework::InferShapeBase {
    if (!context->HasInput("X")) {
      return;
    }
+
    PADDLE_ENFORCE(context->HasOutput("Out"), NotHasOutError());
    context->SetOutputDim("Out", context->GetInputDim("X"));
+
+    // When compile time, we need to:
+    // - for ReadFromArray, share tensor_array X's lod_level to Out
+    // - for WriteToArray, share X's lod_level to tensor_array Out
+    // When runtime, we need to:
+    // - for ReadFromArray, share X[I]'s lod to Out
+    // - for WriteToArray, share X's lod to Out[I]
+    // but we cannot get I's value here, so leave this work to detail
+    // kernel implementation.
+    if (!context->IsRuntime()) {
+      context->ShareLoD("X", /*->*/ "Out");
+    }
  }

 protected:
@ -166,19 +179,6 @@ $$T = A[i]$$
 };

 class ReadFromArrayInferShape : public WriteToArrayInferShape {
- public:
-  void operator()(framework::InferShapeContext *context) const override {
-    WriteToArrayInferShape::operator()(context);
-    if (!context->HasInput("X")) {
-      return;
-    }
-
-    // FIXME: just for compile time.
-    if (!context->IsRuntime()) {
-      context->ShareLoD("X", /*->*/ "Out");
-    }
-  }
-
 protected:
  const char *NotHasXError() const override {
    return "The input array X must be set";
--- a/paddle/fluid/operators/lod_tensor_to_array_op.cc
+++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc
@ -106,9 +106,10 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
    auto max_seq_len = items[0].length;
    auto rank_level = rank_table.level();

-    PADDLE_ENFORCE_LT(rank_level, x.lod().size(),
-                      "Input should be a LOD tensor, and size is at least %d",
-                      rank_level + 1);
+    PADDLE_ENFORCE_LT(
+        rank_level, x.lod().size(),
+        "Input should be a LoDTensor, and its lod_level should be at least %d",
+        rank_level + 1);
    out.resize(max_seq_len);
    std::vector<std::vector<CopyRange>> copy_ranges(max_seq_len);

@ -167,10 +168,21 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
 class LoDTensorToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("X", "");
-    AddInput("RankTable", "");
-    AddOutput("Out", "");
-    AddComment("");
+    AddInput("X",
+             "(LoDTensor), the input lod tensor is a minibatch of sequences, "
+             "and will be split to a tensor_array according to "
+             "Input(RankTable).");
+    AddInput("RankTable", "(LoDRankTable), the rank table.");
+    AddOutput("Out",
+              "(LoDTensorArray), the result tensor_array, which is actually a "
+              "std::vector<LoDTensor>.");
+    AddComment(R"DOC(LoDTensorToArray operator.
+Input(X) is a minibatch of sequences. Input(RankTable) stores the order of the input sequences.
+The lod_tensor_to_array operator will spilt the input sequences to a tensor_array, with each
+element stores one sequence, according to the input rank_table.
+
+NOTE: this operator is an internal component of DynamicRNN, and cannot be called by users.
+)DOC");
  }
 };

@ -187,10 +199,18 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase {
                   "Output(Out) of LoDTensorToArrayOp should not be null.");

    auto x_dim = context->GetInputDim("X");
-    // The first dim of each LoDTensor in Output can only be set at run-time.;
-    // We still have to Resize each LoDTensor in Output.
+    // For compile-time, the first dim of input X and output Out should be -1.
+    // For runtime, the first dim of input X should be the sum of all elements's
+    // first dim in output Out. The output's dims will be re-computed in detail
+    // kernel implementation.
    context->SetOutputDim("Out", x_dim);
-    // The lod level should be passed to out in compile time.
+
+    // The output LoDTensor's lod_level should be input X's lod_level - 1.
+    // For compile time, we call DecreaseLoDLevel to set output's lod_level.
+    // For runtime, output LoDTensor's lod is determined by input X's lod and
+    // the level specified by input RandTable.
+    // We cannot get X's detail lod and RankTable's level in this function, so
+    // leave this work to the detail kernel implementation.
    if (!context->IsRuntime()) {
      context->DecreaseLoDLevel("X", /*->*/ "Out");
    }
--- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
+++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
@ -202,6 +202,9 @@ class IdentityInferShape : public framework::InferShapeBase {
 public:
  void operator()(framework::InferShapeContext *context) const override {
    context->SetOutputDim("Out", context->GetInputDim("X"));
+    // X'lod and Out'lod is different on runtime, so there is no need to call
+    // ShareLoD for runtime. While the setting of Out's lod is done in detail
+    // kernel implementation.
    if (!context->IsRuntime()) {
      context->ShareLoD("X", /*->*/ "Out");
    }
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
@ -28,6 +28,16 @@ class SequencePoolOp : public framework::OperatorWithKernel {
                      "Input(X) of SequencePoolOp should not be null.");
    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
                      "Output(Out) of SequencePoolOp should not be null.");
+
+    if (!ctx->IsRuntime()) {
+      // Check the lod_level for compile-time.
+      framework::VarDesc* x_desc =
+          boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("X")[0]);
+      PADDLE_ENFORCE_GT(
+          x_desc->GetLoDLevel(), 0,
+          "The LoD level Input(X) of sequence_pool should be larger than 0");
+    }
+
    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
    if (ctx->Attrs().Get<std::string>("pooltype") == "MAX") {
      PADDLE_ENFORCE_EQ(
--- a/paddle/fluid/operators/shrink_rnn_memory_op.cc
+++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc
@ -100,8 +100,10 @@ class ShrinkRNNMemoryInferShape : public framework::InferShapeBase {
    PADDLE_ENFORCE(context->HasInput("I"));
    PADDLE_ENFORCE(context->HasInput("RankTable"));
    context->SetOutputDim("Out", context->GetInputDim("X"));
+    // For runtime, output's lod is computed according to input's lod, but
+    // remove the finished sequence. It is set in detail kernel implementation.
    if (!context->IsRuntime()) {
-      context->DecreaseLoDLevel("X", /*->*/ "Out");
+      context->ShareLoD("X", /*->*/ "Out");
    }
  }
 };
--- a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py