Update doc of layers.py

7 years ago · ac29d00cff
parent de2bc5da28 4adc8a7aa1
commit ac29d00cff
195 changed files with 3068 additions and 312 deletions
--- a/.gitignore
+++ b/.gitignore
@ -21,7 +21,7 @@ third_party/
 cmake-build-*
 # generated while compiling
-python/paddle/v2/framework/core.so
+python/paddle/v2/fluid/core.so
 paddle/pybind/pybind.h
 CMakeFiles
 cmake_install.cmake
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@ -377,6 +377,12 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
  return grad_op_descs;
 }
 static BlockDescBind* CreateStepBlock(
    ProgramDescBind& program_desc,
    std::unordered_set<std::string>* no_grad_vars,
    std::unordered_map<std::string, std::string>* grad_to_var,
    int step_block_idx);
 std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
    ProgramDescBind& program_desc, int block_idx,
    std::unordered_set<std::string>* no_grad_vars,
@ -392,13 +398,13 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
    if ((*it)->Type() == "recurrent") {
      int step_block_idx = (*it)->GetBlockAttr("step_block");
-      auto backward_block_op_descs = MakeBlockBackward(
+      BlockDescBind* backward_block = CreateStepBlock(
-          program_desc, step_block_idx, no_grad_vars, grad_to_var);
+          program_desc, no_grad_vars, grad_to_var, step_block_idx);
      op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
    } else if ((*it)->Type() == "conditional_block") {
      BlockDescBind* backward_block =
-          program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
+          CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
-      for (auto& ptr : backward_block_op_descs) {
+                          (*it)->GetBlockAttr("block"));
        backward_block->AppendAllocatedOp(std::move(ptr));
      }
      op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
    } else {
      op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
@ -449,6 +455,21 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
  return backward_descs;
 }
 static BlockDescBind* CreateStepBlock(
    ProgramDescBind& program_desc,
    std::unordered_set<std::string>* no_grad_vars,
    std::unordered_map<std::string, std::string>* grad_to_var,
    int step_block_idx) {
  auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
                                                   no_grad_vars, grad_to_var);
  BlockDescBind* backward_block =
      program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
  for (auto& ptr : backward_block_op_descs) {
    backward_block->AppendAllocatedOp(move(ptr));
  }
  return backward_block;
 }
 ParamGradInfoMap AppendBackward(
    ProgramDescBind& program_desc, const VarDescBind& target,
    const std::unordered_set<std::string>& no_grad_vars) {
--- a/paddle/framework/var_type.h
+++ b/paddle/framework/var_type.h
@ -27,10 +27,32 @@ inline VarDesc::VarType ToVarType(std::type_index type) {
    return VarDesc_VarType_LOD_RANK_TABLE;
  } else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) {
    return VarDesc_VarType_LOD_TENSOR_ARRAY;
  } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
    return VarDesc_VarType_SELECTED_ROWS;
  } else {
    PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
  }
 }
 template <typename Visitor>
 inline void VisitVarType(const Variable& var, Visitor visitor) {
  switch (ToVarType(var.Type())) {
    case VarDesc_VarType_LOD_TENSOR:
      visitor(var.Get<framework::LoDTensor>());
      return;
    case VarDesc_VarType_LOD_RANK_TABLE:
      visitor(var.Get<LoDRankTable>());
      return;
    case VarDesc_VarType_LOD_TENSOR_ARRAY:
      visitor(var.Get<LoDTensorArray>());
      return;
    case VarDesc_VarType_SELECTED_ROWS:
      visitor(var.Get<SelectedRows>());
      return;
    default:
      PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
  }
 }
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp
@ -54,7 +54,6 @@ void MKLDNNAddtoLayer::reshape(
  ow = iw;
  reshapeOutput(oh, ow);
  resizeOutput(bs, oc * oh * ow);
  printSizeInfo();
 }
 void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
--- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
@ -125,7 +125,6 @@ void MKLDNNBatchNormLayer::reshape(
      << "Input channel can not be changed";
  reshapeOutput(oh, ow);
  resizeOutput(bs, oc * oh * ow);
  printSizeInfo();
 }
 void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
--- a/paddle/gserver/layers/MKLDNNConvLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp
@ -102,8 +102,6 @@ void MKLDNNConvLayer::reshape(
  reshapeOutput(oh, ow);
  resizeOutput(bs, oc * oh * ow);
  printSizeInfo();
 }
 void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
--- a/paddle/gserver/layers/MKLDNNConvLayer.h
+++ b/paddle/gserver/layers/MKLDNNConvLayer.h
@ -92,7 +92,7 @@ public:
  void printSizeInfo() override {
    MKLDNNLayer::printSizeInfo();
    VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
-                       << ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
+                       << ", ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
                       << ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_;
  }
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@ -84,8 +84,6 @@ void MKLDNNFcLayer::reshape(
  reshapeOutput(oh, ow);
  resizeOutput(bs, oc);
  printSizeInfo();
 }
 void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
--- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp
@ -71,8 +71,6 @@ void MKLDNNPoolLayer::reshape(
  reshapeOutput(oh, ow);
  resizeOutput(bs, oc * oh * ow);
  printSizeInfo();
 }
 void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
--- a/paddle/gserver/layers/ROIPoolLayer.cpp
+++ b/paddle/gserver/layers/ROIPoolLayer.cpp
@ -98,7 +98,7 @@ void ROIPoolLayer::forward(PassType passType) {
    size_t roiStartH = round(bottomROIs[2] * spatialScale_);
    size_t roiEndW = round(bottomROIs[3] * spatialScale_);
    size_t roiEndH = round(bottomROIs[4] * spatialScale_);
-    CHECK_GE(roiBatchIdx, 0);
+    CHECK_GE(roiBatchIdx, 0UL);
    CHECK_LT(roiBatchIdx, batchSize);
    size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL);
    size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL);
--- a/paddle/gserver/tests/test_MKLDNN.cpp
+++ b/paddle/gserver/tests/test_MKLDNN.cpp
@ -297,7 +297,7 @@ static void getAddtoConfig(TestConfig& cfg,
 }
 void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) {
-  CHECK_GE(nInputs, 1);
+  CHECK_GE(nInputs, 1UL);
  TestConfig dnnConfig;
  getAddtoConfig(dnnConfig, pm, nInputs);
  dnnConfig.layerConfig.set_type("mkldnn_addto");
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@ -214,6 +214,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
 cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
 cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
 cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
 cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc
        rnn/recurrent_op_utils.cc
--- a/paddle/operators/assign_op.cc
+++ b/paddle/operators/assign_op.cc
@ -0,0 +1,138 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/framework/data_type.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/var_type.h"
 namespace paddle {
 namespace operators {
 class AssignFunctor {
 public:
  AssignFunctor(framework::Variable *out,
                const platform::DeviceContext &dev_ctx)
      : out_(out), dev_ctx_(dev_ctx) {}
  void operator()(const framework::LoDTensor &lod_tensor) const {
    auto &out_tensor = *out_->GetMutable<framework::LoDTensor>();
    copy_tensor(lod_tensor, &out_tensor);
  }
  void operator()(const framework::LoDTensorArray &array) const {
    auto &out_array = *out_->GetMutable<framework::LoDTensorArray>();
    out_array.resize(array.size());
    for (size_t i = 0; i < array.size(); ++i) {
      copy_tensor(array[i], &out_array[i]);
    }
  }
  void operator()(const framework::SelectedRows &rows) const {
    framework::SelectedRows &out_rows =
        *out_->GetMutable<framework::SelectedRows>();
    out_rows.set_rows(rows.rows());
    out_rows.set_height(rows.height());
    auto &t = rows.value();
    out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_);
  }
  template <typename T>
  void operator()(const T &v) const {
    PADDLE_THROW("Not support type for assign op %s", typeid(T).name());
  }
 private:
  void copy_tensor(const framework::LoDTensor &lod_tensor,
                   framework::LoDTensor *out) const {
    auto &out_tensor = *out;
    out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_);
    out_tensor.set_lod(lod_tensor.lod());
  }
  framework::Variable *out_;
  const platform::DeviceContext &dev_ctx_;
 };
 class AssignOp : public framework::OperatorBase {
 public:
  AssignOp(const std::string &type, const framework::VariableNameMap &inputs,
           const framework::VariableNameMap &outputs,
           const framework::AttributeMap &attrs)
      : OperatorBase(type, inputs, outputs, attrs) {}
  void Run(const framework::Scope &scope,
           const platform::DeviceContext &dev_ctx) const override {
    auto *x = scope.FindVar(Input("X"));
    if (x == nullptr) {
      return;
    }
    auto *out = scope.FindVar(Output("Out"));
    PADDLE_ENFORCE(
        out != nullptr,
        "The Output(Out) should not be null if the Input(X) is set.");
    framework::VisitVarType(*x, AssignFunctor(out, dev_ctx));
  }
 };
 class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  AssignOpProtoMaker(framework::OpProto *proto,
                     framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X",
             "(LoDTensor, SelectedRows or LoDTensorArray) The input variable "
             "could be LoDTensor, SelectedRows or LoDTensorArray.")
        .AsDispensable();
    AddOutput("Out",
              "(LoDTensor, SelectedRows or LoDTensorArray) The type of output "
              "is the same as input X.");
    AddComment(R"DOC(Assign Operator
 Out = X,  when type in [LoDTensor/SelectedRows/LoDTensorArray]
 raise error if the type is not listed above.
 )DOC");
  }
 };
 class AssignInferShape : public framework::InferShapeBase {
 public:
  void operator()(framework::InferShapeContext *context) const override {
    if (context->HasInput("X")) {
      auto type = context->GetInputsVarType("X")[0];
      if (type == framework::VarDesc_VarType_SELECTED_ROWS ||
          type == framework::VarDesc_VarType_LOD_TENSOR) {
        context->SetOutputDim("Out", context->GetInputDim("X"));
      }
    }
  }
 };
 class AssignGradMaker : public framework::SingleGradOpDescMaker {
 public:
  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
 protected:
  std::unique_ptr<framework::OpDescBind> Apply() const override {
    auto *op = new framework::OpDescBind();
    op->SetType("assign");
    op->SetInput("X", OutputGrad("Out"));
    op->SetOutput("Out", InputGrad("X"));
    return std::unique_ptr<framework::OpDescBind>(op);
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(assign, ops::AssignOp, ops::AssignGradMaker,
                  ops::AssignInferShape, ops::AssignOpProtoMaker);
--- a/paddle/operators/beam_search_decode_op.cc
+++ b/paddle/operators/beam_search_decode_op.cc
@ -0,0 +1,111 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/operators/beam_search_decode_op.h"
 namespace paddle {
 namespace operators {
 class BeamSearchDecodeOp : public framework::OperatorBase {
 public:
  BeamSearchDecodeOp(const std::string& type,
                     const framework::VariableNameMap& inputs,
                     const framework::VariableNameMap& outputs,
                     const framework::AttributeMap& attrs)
      : OperatorBase(type, inputs, outputs, attrs) {}
  void Run(const framework::Scope& scope,
           const platform::DeviceContext& dev_ctx) const override {
    framework::ExecutionContext ctx(*this, scope, dev_ctx);
    const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
    const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
    const size_t step_num = ids->size();
    PADDLE_ENFORCE_GT(step_num, 0UL,
                      "beam search steps should be larger than 0");
    const size_t source_num = ids->at(0).lod().at(0).size() - 1;
    PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0");
    for (size_t i = 0; i < step_num; ++i) {
      PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL,
                        "Level of LodTensor should be 2");
    }
    // prepare output
    LoDTensor* sentenceIds = ctx.Output<LoDTensor>("SentenceIds");
    LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
    BeamSearchDecoder<float> beam_search_decoder;
    beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds,
                                     sentenceScores);
  }
 };
 class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  BeamSearchDecodeOpProtoMaker(framework::OpProto* proto,
                               framework::OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("Ids",
             "(LodTensorArray)"
             "score of the candidate words in each step");
    AddInput("Scores",
             "(LodTensorArray)"
             "score of the candidate words in each step");
    AddOutput("SentenceIds",
              "(LodTensor)"
              "All possible result sentences of word ids");
    AddOutput("SentenceScores",
              "(LodTensor)"
              "All possible result sentences of word scores");
    AddComment(R"DOC(
 Pack the result of Beam search op into SentenceIds and SentenceScores.
 )DOC");
  }
 };
 class BeamSearchDecodeInferShape : public framework::InferShapeBase {
 public:
  void operator()(framework::InferShapeContext* context) const override {
    PADDLE_ENFORCE(context->HasInput("Ids"),
                   "BeamSearchDecodeOp must has input Ids");
    PADDLE_ENFORCE(context->HasInput("Scores"),
                   "BeamSearchDecodeOp must has input Scores");
    PADDLE_ENFORCE(context->HasOutput("SentenceIds"),
                   "BeamSearchDecodeOp must has output SentenceIds");
    PADDLE_ENFORCE(context->HasOutput("SentenceScores"),
                   "BeamSearchDecodeOp must has output SentenceScores");
  }
 };
 class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
 public:
  void operator()(const framework::OpDescBind& op_desc,
                  framework::BlockDescBind* block) const override {
    for (auto& o : op_desc.Output("SentenceIds")) {
      block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
    }
    for (auto& o : op_desc.Output("SentenceScores")) {
      block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
 REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp,
                  paddle::operators::BeamSearchDecodeOpProtoMaker,
                  paddle::operators::BeamSearchDecodeInferShape,
                  paddle::operators::BeamSearchDecodeInferVarType,
                  paddle::framework::EmptyGradOpMaker);
--- a/paddle/operators/beam_search_decode_op.h
+++ b/paddle/operators/beam_search_decode_op.h
--- a/paddle/operators/beam_search_decode_op_test.cc
+++ b/paddle/operators/beam_search_decode_op_test.cc
@ -0,0 +1,221 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/operators/beam_search_decode_op.h"
 #include "gtest/gtest.h"
 using CPUPlace = paddle::platform::CPUPlace;
 using LoD = paddle::framework::LoD;
 using LoDTensor = paddle::framework::LoDTensor;
 using LoDTensorArray = paddle::framework::LoDTensorArray;
 template <typename T>
 using BeamNode = paddle::operators::BeamNode<T>;
 template <typename T>
 using BeamSearchDecoder = paddle::operators::BeamSearchDecoder<T>;
 template <typename T>
 using Sentence = paddle::operators::Sentence<T>;
 template <typename T>
 using BeamNodeVector = paddle::operators::BeamNodeVector<T>;
 template <typename T>
 using SentenceVector = paddle::operators::SentenceVector<T>;
 namespace paddle {
 namespace test {
 void GenerateExample(const std::vector<size_t>& level_0,
                     const std::vector<size_t>& level_1,
                     const std::vector<int>& data, LoDTensorArray* ids,
                     LoDTensorArray* scores) {
  PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1,
                    "source level is used to describe candidate set");
  PADDLE_ENFORCE_EQ(level_1.back(), data.size(),
                    "the lowest level is used to describe data"
                    ", so it's last element should be data length");
  CPUPlace place;
  LoD lod;
  lod.push_back(level_0);
  lod.push_back(level_1);
  // Ids
  LoDTensor tensor_id;
  tensor_id.set_lod(lod);
  tensor_id.Resize({static_cast<int64_t>(data.size())});
  // malloc memory
  int64_t* id_ptr = tensor_id.mutable_data<int64_t>(place);
  for (size_t i = 0; i < data.size(); ++i) {
    id_ptr[i] = static_cast<int64_t>(data.at(i));
  }
  // Scores
  LoDTensor tensor_score;
  tensor_score.set_lod(lod);
  tensor_score.Resize({static_cast<int64_t>(data.size())});
  // malloc memory
  float* score_ptr = tensor_score.mutable_data<float>(place);
  for (size_t i = 0; i < data.size(); ++i) {
    score_ptr[i] = static_cast<float>(data.at(i));
  }
  ids->push_back(tensor_id);
  scores->push_back(tensor_score);
 }
 }  // namespace test
 }  // namespace paddle
 TEST(BeamSearchDecodeOp, DeleteBeamNode) {
  auto* root = new BeamNode<float>(0, 0);
  auto* b1 = new BeamNode<float>(1, 1);
  auto* b2 = new BeamNode<float>(2, 2);
  auto* b3 = new BeamNode<float>(3, 3);
  b1->AppendTo(root);
  b2->AppendTo(root);
  b3->AppendTo(b1);
  delete b3;
  delete b2;
 }
 TEST(BeamSearchDecodeOp, MakeSentence) {
  auto* root = new BeamNode<float>(0, 0);
  auto* b1 = new BeamNode<float>(1, 1);
  auto* end = new BeamNode<float>(2, 2);
  b1->AppendTo(root);
  end->AppendTo(b1);
  BeamSearchDecoder<float> helper;
  Sentence<float> sentence = helper.MakeSentence(end);
  delete end;
  std::vector<int64_t> expect_ids = {0, 1, 2};
  ASSERT_EQ(sentence.word_ids, expect_ids);
  std::vector<float> expect_scores = {0, 1, 2};
  ASSERT_EQ(sentence.scores, expect_scores);
 }
 TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) {
  CPUPlace place;
  LoDTensorArray ids;
  LoDTensorArray scores;
  paddle::test::GenerateExample(
      std::vector<size_t>{0, 2, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
      std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
  std::vector<BeamNodeVector<float>> beamnode_vector_list;
  std::vector<SentenceVector<float>> sentence_vector_list(
      2, SentenceVector<float>());
  BeamSearchDecoder<float> helper;
  beamnode_vector_list = helper.PackTwoSteps(
      ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
  ASSERT_EQ(beamnode_vector_list.size(), 2UL);
  ASSERT_EQ(beamnode_vector_list[0].size(), 2UL);
  ASSERT_EQ(beamnode_vector_list[1].size(), 4UL);
 }
 TEST(BeamSearchDecodeOp, PackTwoSteps) {
  CPUPlace place;
  // first source has three prefix
  BeamNodeVector<float> source0_prefixes;
  source0_prefixes.push_back(
      std::unique_ptr<BeamNode<float>>(new BeamNode<float>(1, 1)));
  source0_prefixes.push_back(
      std::unique_ptr<BeamNode<float>>(new BeamNode<float>(0, 0)));
  source0_prefixes.push_back(
      std::unique_ptr<BeamNode<float>>(new BeamNode<float>(3, 3)));
  // second source has two prefix
  BeamNodeVector<float> source1_prefixes;
  source1_prefixes.push_back(
      std::unique_ptr<BeamNode<float>>(new BeamNode<float>(4, 4)));
  source1_prefixes.push_back(
      std::unique_ptr<BeamNode<float>>(new BeamNode<float>(5, 5)));
  std::vector<BeamNodeVector<float>> beamnode_vector_list;
  std::vector<SentenceVector<float>> sentence_vector_list(
      2, SentenceVector<float>());
  beamnode_vector_list.push_back(std::move(source0_prefixes));
  beamnode_vector_list.push_back(std::move(source1_prefixes));
  // generate data for one step
  LoDTensorArray ids;
  LoDTensorArray scores;
  paddle::test::GenerateExample(std::vector<size_t>{0, 3, 5},
                                std::vector<size_t>{0, 1, 1, 3, 4, 5},
                                std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
  BeamSearchDecoder<float> helper1;
  beamnode_vector_list = helper1.PackTwoSteps(
      ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
  ASSERT_EQ(sentence_vector_list[0].size(), 1UL);
  ASSERT_EQ(sentence_vector_list[1].size(), 0UL);
  ASSERT_EQ(beamnode_vector_list[0].size(), 3UL);
  ASSERT_EQ(beamnode_vector_list[1].size(), 2UL);
 }
 TEST(BeamSearchDecodeOp, PackAllSteps) {
  CPUPlace place;
  // we will constuct a sample data with 3 steps and 2 source sentences
  LoDTensorArray ids;
  LoDTensorArray scores;
  paddle::test::GenerateExample(
      std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
      std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
  paddle::test::GenerateExample(
      std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 1, 3, 5, 5, 6},
      std::vector<int>{0, 1, 2, 3, 4, 5}, &ids, &scores);
  paddle::test::GenerateExample(std::vector<size_t>{0, 3, 6},
                                std::vector<size_t>{0, 0, 1, 2, 3, 4, 5},
                                std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
  ASSERT_EQ(ids.size(), 3UL);
  ASSERT_EQ(scores.size(), 3UL);
  BeamSearchDecoder<float> helper;
  LoDTensor id_tensor;
  LoDTensor score_tensor;
  helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor);
  LoD lod = id_tensor.lod();
  std::vector<size_t> expect_source_lod = {0, 4, 8};
  EXPECT_EQ(lod[0], expect_source_lod);
  std::vector<size_t> expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19};
  EXPECT_EQ(lod[1], expect_sentence_lod);
  // 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4
  std::vector<int> expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5,
                                  4, 3, 2, 4, 4, 3, 6, 5, 4};
  ASSERT_EQ(id_tensor.dims()[0], static_cast<int64_t>(expect_data.size()));
  for (size_t i = 0; i < expect_data.size(); ++i) {
    ASSERT_EQ(id_tensor.data<int64_t>()[i],
              static_cast<int64_t>(expect_data[i]));
  }
  for (int64_t i = 0; i < id_tensor.dims()[0]; ++i) {
    ASSERT_EQ(score_tensor.data<float>()[i],
              static_cast<float>(id_tensor.data<int64_t>()[i]));
  }
 }
--- a/paddle/operators/bilinear_tensor_product_op.cc
+++ b/paddle/operators/bilinear_tensor_product_op.cc
@ -0,0 +1,159 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/operators/bilinear_tensor_product_op.h"
 namespace paddle {
 namespace operators {
 using framework::Tensor;
 class BilinearTensorProductOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("Weight"),
                   "Input(Weight) should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
    auto x_dims = ctx->GetInputDim("X");
    auto y_dims = ctx->GetInputDim("Y");
    auto weight_dims = ctx->GetInputDim("Weight");
    PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input(X) must be a 2D Tensor.");
    PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input(Y) must be a 2D Tensor.");
    PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL,
                      "The input(Weight) must be a 3D tensor.");
    PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0],
                      "The first dimension(batch_size) of input(X) must be "
                      "equal to the first dimension of the input(Y).");
    PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1],
                      "The second dimension of input(X) must be equal to "
                      "the second dimension of the input(Weight).");
    PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2],
                      "The second dimension of input(Y) must be equal to "
                      "the third dimension of the input(Weight).");
    if (ctx->HasInput("Bias")) {
      auto bias_dims = ctx->GetInputDim("Bias");
      PADDLE_ENFORCE(bias_dims.size() == 2UL && bias_dims[0] == 1UL,
                     "The Input(Bias) must be a 2-D tensor with "
                     "the 2nd dimension fixed to 1 (a row vector).");
      PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0],
                        "The second dimension of input(Bias) must be equal "
                        "to the first dimension of the input(Weight).");
    }
    ctx->SetOutputDim("Out", {x_dims[0], weight_dims[0]});
    ctx->ShareLoD("X", /*->*/ "Out");
  }
 };
 class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  BilinearTensorProductOpMaker(framework::OpProto* proto,
                               framework::OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X", "The first input of bilinear_tensor_product operator.");
    AddInput("Y", "The second input of bilinear_tensor_product operator.");
    AddInput("Weight",
             "The learnable parameters of bilinear_tensor_product operator.");
    AddInput("Bias", "The learnable bias of bilinear_tensor_product operator.")
        .AsDispensable();
    AddOutput("Out", "The output of bilinear_tensor_product operator.");
    AddComment(R"DOC(
 Bilinear Tensor Product operator.
 Given input X and Y, a 3D tensor weight, and bias. Each column of the
 output is computed by one slice i = 1, . . . , k of the tensor:
    M =  (X W_i) \cdot Y
    Out_i = \sum_i {M_i} + Bias_i
 )DOC");
  }
 };
 class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("Weight"),
                   "Input(Weight) should not be null.");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
                   "Input(Out@GRAD) should not be null.");
    auto x_dims = ctx->GetInputDim("X");
    auto y_dims = ctx->GetInputDim("Y");
    auto weight_dims = ctx->GetInputDim("Weight");
    auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
    PADDLE_ENFORCE_EQ(out_dims.size(), 2UL,
                      "The input(Out@GRAD) must be a 2D Tensor.");
    PADDLE_ENFORCE_EQ(
        x_dims[0], out_dims[0],
        "The first dimension(batch_size) of input(Out@GRAD) must be "
        "equal to the first dimension of the Input(X).");
    PADDLE_ENFORCE_EQ(
        weight_dims[0], out_dims[1],
        "The second dimension of input(Out@GRAD) must be equal to "
        "the third dimension of the Input(Weight).");
    if (ctx->HasInput("Bias")) {
      auto bias_dims = ctx->GetInputDim("Bias");
      PADDLE_ENFORCE_EQ(
          bias_dims[1], out_dims[1],
          "The second dimension of input(Out@GRAD) must be equal to "
          "the second dimension of the Input(Bias).");
      auto bias_grad_name = framework::GradVarName("Bias");
      if (ctx->HasOutput(bias_grad_name))
        ctx->SetOutputDim(bias_grad_name, bias_dims);
    }
    auto x_grad_name = framework::GradVarName("X");
    auto y_grad_name = framework::GradVarName("Y");
    auto weight_grad_name = framework::GradVarName("Weight");
    if (ctx->HasOutput(x_grad_name)) {
      ctx->SetOutputDim(x_grad_name, x_dims);
    }
    if (ctx->HasOutput(y_grad_name)) {
      ctx->SetOutputDim(y_grad_name, y_dims);
    }
    if (ctx->HasOutput(weight_grad_name)) {
      ctx->SetOutputDim(weight_grad_name, weight_dims);
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp,
            ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad,
            ops::BilinearTensorProductOpGrad);
 REGISTER_OP_CPU_KERNEL(
    bilinear_tensor_product,
    ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, float>,
    ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, double>);
 REGISTER_OP_CPU_KERNEL(
    bilinear_tensor_product_grad,
    ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, float>,
    ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, double>);
--- a/paddle/operators/bilinear_tensor_product_op.cu
+++ b/paddle/operators/bilinear_tensor_product_op.cu
@ -0,0 +1,26 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #define EIGEN_USE_GPU
 #include "paddle/operators/bilinear_tensor_product_op.h"
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(
    bilinear_tensor_product,
    ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, float>,
    ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, double>);
 REGISTER_OP_GPU_KERNEL(
    bilinear_tensor_product_grad,
    ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>,
    ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, double>);
--- a/paddle/operators/bilinear_tensor_product_op.h
+++ b/paddle/operators/bilinear_tensor_product_op.h
@ -0,0 +1,184 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/math/math_function.h"
 namespace paddle {
 namespace operators {
 using framework::Tensor;
 template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
 template <typename Place, typename T>
 class BilinearTensorProductKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* x = ctx.Input<Tensor>("X");
    auto* y = ctx.Input<Tensor>("Y");
    auto* weight = ctx.Input<Tensor>("Weight");
    auto* bias = ctx.Input<Tensor>("Bias");
    auto* out = ctx.Output<Tensor>("Out");
    out->mutable_data<T>(ctx.GetPlace());
    auto y_mat = EigenMatrix<T>::From(*y);
    auto output_mat = EigenMatrix<T>::From(*out);
    auto batch_size = x->dims()[0];
    auto weight_dims = weight->dims();
    int out_dim = weight_dims[0];
    auto x_dim = weight_dims[1];
    auto y_dim = weight_dims[2];
    auto place = ctx.GetEigenDevice<Place>();
    // Create the intermediate variable to caculate the result of
    // Input(X) multiplied by Input(Weight_i), the formula is:
    // left_mul = X Weight_i.
    Tensor left_mul;
    left_mul.mutable_data<T>(framework::make_ddim({batch_size, y_dim}),
                             ctx.GetPlace());
    auto left_mul_mat = EigenMatrix<T>::From(left_mul);
    for (int i = 0; i < out_dim; ++i) {
      auto output_col_vec = output_mat.chip(i, 1);
      Tensor weight_mat =
          weight->Slice(i, i + 1).Resize(framework::make_ddim({x_dim, y_dim}));
      math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
                           batch_size, y_dim, x_dim, 1, x->data<T>(),
                           weight_mat.data<T>(), 0, left_mul.data<T>());
      output_col_vec.device(place) =
          (left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
    }
    if (bias) {
      auto bias_vec = EigenMatrix<T>::From(*bias);
      Eigen::DSizes<int, 2> bcast(batch_size, 1);
      output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
    }
  }
 };
 template <typename Place, typename T>
 class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    const Tensor* x = ctx.Input<Tensor>("X");
    const Tensor* y = ctx.Input<Tensor>("Y");
    const Tensor* weight = ctx.Input<Tensor>("Weight");
    Tensor* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
    Tensor* d_y = ctx.Output<Tensor>(framework::GradVarName("Y"));
    Tensor* d_weight = ctx.Output<Tensor>(framework::GradVarName("Weight"));
    Tensor* d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
    const Tensor* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
    auto batch_size = x->dims()[0];
    auto weight_dims = weight->dims();
    int out_dim = weight_dims[0];
    auto x_dim = weight_dims[1];
    auto y_dim = weight_dims[2];
    auto x_mat = EigenMatrix<T>::From(*x);
    auto y_mat = EigenMatrix<T>::From(*y);
    auto d_out_mat = EigenMatrix<T>::From(*d_out);
    auto place = ctx.GetEigenDevice<Place>();
    // Create the intermediate variable to caculate the Output(Y@Grad).
    Tensor x_scale;
    x_scale.mutable_data<T>(framework::make_ddim({batch_size, x_dim}),
                            ctx.GetPlace());
    auto x_scale_mat = EigenMatrix<T>::From(x_scale);
    // Create the intermediate variable to caculate the Output(X@Grad).
    Tensor y_scale;
    y_scale.mutable_data<T>(framework::make_ddim({batch_size, y_dim}),
                            ctx.GetPlace());
    auto y_scale_mat = EigenMatrix<T>::From(y_scale);
    math::SetConstant<Place, T> set_zero;
    // Set Output(X@Grad) be zero.
    if (d_x) {
      d_x->mutable_data<T>(ctx.GetPlace());
      set_zero(ctx.device_context(), d_x, static_cast<T>(0));
    }
    // Set Output(Y@Grad) be zero.
    if (d_y) {
      d_y->mutable_data<T>(ctx.GetPlace());
      set_zero(ctx.device_context(), d_y, static_cast<T>(0));
    }
    // Caculate the Output(X@Grad) and Output(Y@Grad).
    if (d_x || d_y) {
      Eigen::DSizes<int, 2> bcast_for_x(1, y_dim);
      Eigen::DSizes<int, 2> bcast_for_y(1, x_dim);
      for (int i = 0; i < out_dim; ++i) {
        Tensor weight_i = weight->Slice(i, i + 1).Resize(
            framework::make_ddim({x_dim, y_dim}));
        auto output_vec = d_out_mat.chip(i, 1);
        if (d_x) {
          y_scale_mat.device(place) =
              output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
                  .broadcast(bcast_for_x) *
              y_mat;
          math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasTrans,
                               batch_size, x_dim, y_dim, 1, y_scale.data<T>(),
                               weight_i.data<T>(), 1, d_x->data<T>());
        }
        if (d_y) {
          x_scale_mat.device(place) =
              output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
                  .broadcast(bcast_for_y) *
              x_mat;
          math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
                               batch_size, y_dim, x_dim, 1, x_scale.data<T>(),
                               weight_i.data<T>(), 1, d_y->data<T>());
        }
      }
    }
    // Caculate the gradient of Input(Weight).
    if (d_weight) {
      d_weight->mutable_data<T>(ctx.GetPlace());
      Eigen::DSizes<int, 2> bcast_for_weight(1, x_dim);
      for (int i = 0; i < out_dim; ++i) {
        Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize(
            framework::make_ddim({x_dim, y_dim}));
        auto output_vec = d_out_mat.chip(i, 1);
        x_scale_mat.device(place) =
            output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
                .broadcast(bcast_for_weight) *
            x_mat;
        math::gemm<Place, T>(ctx.device_context(), CblasTrans, CblasNoTrans,
                             x_dim, y_dim, batch_size, 1, x_scale.data<T>(),
                             y->data<T>(), 0, d_weight_i.data<T>());
      }
    }
    // Caculate the gradient of Input(Bias).
    if (d_bias) {
      d_bias->mutable_data<T>(ctx.GetPlace());
      auto d_bias_mat = EigenMatrix<T>::From(*d_bias);
      d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes<int, 1>(0));
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/conditional_block_op.cc
+++ b/paddle/operators/conditional_block_op.cc
@ -0,0 +1,197 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include <algorithm>
 #include "paddle/framework/executor.h"
 #include "paddle/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 class ConditionalOp : public framework::OperatorBase {
 public:
  ConditionalOp(const std::string &type,
                const framework::VariableNameMap &inputs,
                const framework::VariableNameMap &outputs,
                const framework::AttributeMap &attrs)
      : OperatorBase(type, inputs, outputs, attrs) {}
 protected:
  std::vector<const framework::LoDTensor *> InputTensors(
      const framework::Scope &scope) const {
    std::vector<const framework::LoDTensor *> retv;
    auto xs = Inputs("X");
    retv.resize(xs.size(), nullptr);
    std::transform(
        xs.begin(), xs.end(), retv.begin(),
        [&scope](const std::string &var_name) -> const framework::LoDTensor * {
          auto *var = scope.FindVar(var_name);
          PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", var_name);
          return &var->Get<framework::LoDTensor>();
        });
    return retv;
  }
 };
 class ConditionalBlockOp : public ConditionalOp {
 public:
  ConditionalBlockOp(const std::string &type,
                     const framework::VariableNameMap &inputs,
                     const framework::VariableNameMap &outputs,
                     const framework::AttributeMap &attrs)
      : ConditionalOp(type, inputs, outputs, attrs) {}
  void Run(const framework::Scope &scope,
           const platform::DeviceContext &dev_ctx) const override {
    auto xs = InputTensors(scope);
    bool need_run = std::all_of(
        xs.begin(), xs.end(),
        [](const framework::LoDTensor *t) { return t->numel() != 0; });
    if (need_run) {
      auto *scope_var = scope.FindVar(Output("Scope"));
      PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
      auto *scopes = scope_var->GetMutable<std::vector<framework::Scope *>>();
      scopes->resize(1);
      scopes->front() = &scope.NewScope();
      auto &cur_scope = *scopes->front();
      auto *block = Attr<framework::BlockDescBind *>("block");
      framework::Executor exec(dev_ctx);
      exec.Run(*block->Program(), &cur_scope, block->ID(), false);
    }
  }
 };
 class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  ConditionalBlockOpProtoMaker(framework::OpProto *proto,
                               framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X",
             "The conditional variable of this operator. If X is empty, the "
             "whole sub-block will not be executed.")
        .AsDuplicable();
    AddInput("Params", "The input variables of the sub-block.").AsDuplicable();
    AddOutput("Out", "The output variables of the sub-block.").AsDuplicable();
    AddOutput("Scope",
              "(std::vector<Scope*>) The step scope of conditional block. To "
              "unify the conditional block, rnn and while op, the type of "
              "scope is std::vector<Scope*>");
    AddAttr<framework::BlockDescBind *>(
        "block", "The step block of conditional block operator");
    AddComment(R"DOC(Conditional block operator
 Run the sub-block if X is not empty. Params is the other inputs and Out is the
 outputs of the sub-block.
 )DOC");
  }
 };
 class ConditionalBlockGradOp : public ConditionalOp {
 public:
  ConditionalBlockGradOp(const std::string &type,
                         const framework::VariableNameMap &inputs,
                         const framework::VariableNameMap &outputs,
                         const framework::AttributeMap &attrs)
      : ConditionalOp(type, inputs, outputs, attrs) {}
  void Run(const framework::Scope &scope,
           const platform::DeviceContext &dev_ctx) const override {
    auto xs = this->InputTensors(scope);
    bool need_run = std::all_of(
        xs.begin(), xs.end(),
        [](const framework::LoDTensor *t) { return t->numel() != 0; });
    if (need_run) {
      auto *scope_var = scope.FindVar(Input("Scope"));
      PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
      auto &scopes = scope_var->Get<std::vector<framework::Scope *>>();
      framework::Scope &cur_scope = *scopes[0];
      auto *block = Attr<framework::BlockDescBind *>("block");
      framework::Executor exec(dev_ctx);
      exec.Run(*block->Program(), &cur_scope, block->ID(), false);
      AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("Params"),
                                  Outputs(framework::GradVarName("Params")));
      AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("X"),
                                  Outputs(framework::GradVarName("X")));
    }
  }
 private:
  void AssignLocalGradientToGlobal(
      const platform::DeviceContext &dev_ctx, const framework::Scope &cur_scope,
      const std::vector<std::string> &p_names,
      const std::vector<std::string> &pg_names) const {
    for (size_t i = 0; i < p_names.size(); ++i) {
      auto out_grad_name = pg_names[i];
      auto in_grad_name = framework::GradVarName(p_names[i]);
      auto *in_var = cur_scope.FindVar(in_grad_name);
      if (in_var == nullptr) {
        continue;
      }
      auto new_in_grad_name = cur_scope.Rename(in_grad_name);
      auto assign =
          framework::OpRegistry::CreateOp("assign", {{"X", {new_in_grad_name}}},
                                          {{"Out", {out_grad_name}}}, {});
      assign->Run(cur_scope, dev_ctx);
      cur_scope.Rename(new_in_grad_name, in_grad_name);
    }
  }
 };
 class ConditionalBlockGradInferShape : public framework::InferShapeBase {
 public:
  void operator()(framework::InferShapeContext *context) const override {
    PADDLE_ENFORCE(context->HasInputs("X"));
    if (context->HasInputs("Params")) {
      PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("Params")));
      context->SetOutputsDim(framework::GradVarName("Params"),
                             context->GetInputsDim("Params"));
    }
    PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("X")));
    context->SetOutputsDim(framework::GradVarName("X"),
                           context->GetInputsDim("X"));
  }
 };
 class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker {
 public:
  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
 protected:
  std::unique_ptr<framework::OpDescBind> Apply() const override {
    auto grad_op = new framework::OpDescBind();
    grad_op->SetType("conditional_block_grad");
    grad_op->SetInput("X", Input("X"));
    grad_op->SetInput("Params", Input("Params"));
    grad_op->SetInput("Out", Output("Out"));
    grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
    grad_op->SetInput("Scope", Output("Scope"));
    grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
    grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params"));
    grad_op->SetBlockAttr("block", *this->grad_block_[0]);
    return std::unique_ptr<framework::OpDescBind>(grad_op);
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(conditional_block, ops::ConditionalBlockOp,
                  ops::ConditionalBlockOpProtoMaker,
                  ops::ConditionalBlockGradMaker);
 REGISTER_OPERATOR(conditional_block_grad, ops::ConditionalBlockGradOp,
                  ops::ConditionalBlockGradInferShape);
--- a/paddle/operators/l1_norm_op.h
+++ b/paddle/operators/l1_norm_op.h
@ -29,7 +29,7 @@ class L1NormKernel : public framework::OpKernel<T> {
    Out->mutable_data<T>(context.GetPlace());
    auto x = framework::EigenVector<T>::Flatten(*X);
-    auto out = framework::EigenVector<T>::Flatten(*Out);
+    auto out = framework::EigenScalar<T>::From(*Out);
    auto place = context.GetEigenDevice<Place>();
    out.device(place) = x.abs().sum();
--- a/paddle/operators/lod_reset_op.cc
+++ b/paddle/operators/lod_reset_op.cc
@ -0,0 +1,120 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/operators/lod_reset_op.h"
 namespace paddle {
 namespace operators {
 class LoDResetOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext *ctx) const override {
    // input check
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of LoDResetOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   "Output(Out) of LoDResetOp should not be null.");
    // If target LoD is not set form Input(), then it must be set from Attr().
    if (!ctx->HasInput("TargetLoD")) {
      auto level0 = ctx->Attrs().Get<std::vector<int>>("target_lod");
      PADDLE_ENFORCE(level0.size() > 1,
                     "Target LoD is not found, should be set to be a valid one "
                     "through Input() or Attr().");
    }
    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
  }
 protected:
  framework::OpKernelType GetKernelType(
      const framework::ExecutionContext &ctx) const override {
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
        ctx.device_context());
  }
 };
 class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  LoDResetOpMaker(framework::OpProto *proto,
                  framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X", "(LoDTensor) The input tensor of lod_reset operator.");
    AddInput("TargetLoD",
             "(Tensor, optional) The target level 0 LoD from Input().")
        .AsDispensable();
    AddOutput("Out", "(LoDTensor) The output tensor of lod_reset operator.");
    AddAttr<std::vector<int>>("target_lod",
                              "The target level 0 LoD from Attr().")
        .SetDefault(std::vector<int>{});
    AddComment(R"DOC(LoDReset operator
 Reset LoD of Input(X) into a new one specified by Input(TargetLoD) or
 Attr(target_lod), or set LoD for Input(X) if it doesn't have one.
 Currently the lod_reset operator only supports the reset of level 0 LoD.
 At least one of Input(TargetLoD) and Attr(target_lod) must be set,
 and if both of them are set, Input(TargetLoD) will be chosen as the
 target LoD.
 An example:
 Given a float LoDTensor X with shape (6, 1), its transpose form represents
    [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
 with LoD = [[0, 2, 5, 6]] and the three (transposed) sequences look like
    [1.0, 2.0], [3.0, 4.0, 5.0], [6.0].
 If target LoD = [0, 4, 6], the lod_reset operator will reset the LoD and
 the sequences that the LoDTensor Output(Out) contains becomes:
    [1.0, 2.0, 3.0, 4.0], [5.0, 6.0].
 )DOC");
  }
 };
 class LoDResetGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext *ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null.");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
                   "Input(Out@GRAD) shouldn't be null.");
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }
 protected:
  framework::OpKernelType GetKernelType(
      const framework::ExecutionContext &ctx) const override {
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
        ctx.device_context());
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, lod_reset_grad,
            ops::LoDResetGradOp);
 REGISTER_OP_CPU_KERNEL(lod_reset,
                       ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
                       ops::LoDResetKernel<paddle::platform::CPUPlace, double>);
 REGISTER_OP_CPU_KERNEL(
    lod_reset_grad, ops::LoDResetGradKernel<paddle::platform::CPUPlace, float>,
    ops::LoDResetGradKernel<paddle::platform::CPUPlace, double>);
--- a/paddle/operators/lod_reset_op.cu
+++ b/paddle/operators/lod_reset_op.cu
@ -0,0 +1,24 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #include "paddle/operators/lod_reset_op.h"
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(lod_reset,
                       ops::LoDResetKernel<paddle::platform::GPUPlace, float>,
                       ops::LoDResetKernel<paddle::platform::GPUPlace, double>);
 REGISTER_OP_GPU_KERNEL(
    lod_reset_grad, ops::LoDResetGradKernel<paddle::platform::GPUPlace, float>,
    ops::LoDResetGradKernel<paddle::platform::GPUPlace, double>);
--- a/paddle/operators/lod_reset_op.h
+++ b/paddle/operators/lod_reset_op.h
@ -0,0 +1,78 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 template <typename Place, typename T>
 class LoDResetKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const {
    auto* out = ctx.Output<framework::LoDTensor>("Out");
    auto* in = ctx.Input<framework::LoDTensor>("X");
    auto* lod_t = ctx.Input<framework::Tensor>("TargetLoD");
    std::vector<int> level0;
    if (lod_t) {
      auto* lod = lod_t->data<int>();
      if (platform::is_gpu_place(ctx.GetPlace())) {
        framework::Tensor lod_cpu;
        lod_cpu.CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context());
        lod = lod_cpu.data<int>();
      }
      level0 = std::vector<int>(lod, lod + lod_t->numel());
    } else {
      level0 = ctx.Attr<std::vector<int>>("target_lod");
    }
    PADDLE_ENFORCE(level0.size() > 1UL,
                   "The size of target LoD should be greater than 1.");
    PADDLE_ENFORCE(level0[0] == 0,
                   "Target LoD should be a vector starting from 0.");
    PADDLE_ENFORCE(level0.back() == in->dims()[0],
                   "Target LoD should be a vector end with the "
                   "first dimension of Input(X).");
    for (size_t i = 0; i < level0.size() - 1; ++i) {
      PADDLE_ENFORCE(level0[i + 1] > level0[i],
                     "Target LoD should be an ascending vector.");
    }
    out->ShareDataWith(*in);
    // cast level0 to size_t
    std::vector<size_t> ulevel0(level0.size(), 0);
    std::transform(level0.begin(), level0.end(), ulevel0.begin(),
                   [](int a) { return static_cast<size_t>(a); });
    framework::LoD target_lod;
    target_lod.push_back(ulevel0);
    out->set_lod(target_lod);
  }
 };
 template <typename Place, typename T>
 class LoDResetGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const {
    auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
    auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
    d_x->ShareDataWith(*d_out);
  }
 };
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/matmul_op.h
+++ b/paddle/operators/matmul_op.h
@ -74,11 +74,10 @@ Tensor CombineBatchAndN(const framework::ExecutionContext& context,
  Tensor output;
  auto in_dims = input.dims();
  if (in_dims.size() == 3) {
-    output.Resize(in_dims);
+    output.Resize({in_dims[1], in_dims[0], in_dims[2]});
    output.mutable_data<T>(context.GetPlace());
    EigenTranspose<Place, T, 3>(context, input, output, {1, 0, 2});
-    std::vector<int64_t> out_dims = {in_dims[1], in_dims[0] * in_dims[2]};
+    output.Resize({in_dims[1], in_dims[0] * in_dims[2]});
    output.Resize(make_ddim(out_dims));
  } else {
    output.ShareDataWith(input);
  }
--- a/Show More
+++ b/Show More