Using LoDTensor instead of Tensor in every operator.

8 years ago · f299206396
parent d11430e009
commit f299206396
33 changed files with 409 additions and 146 deletions
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@ -59,7 +59,7 @@ class LoDTensor : public Tensor {
  void set_lod(const LoD& lod) { lod_ = lod; }
-  LoD lod() { return lod_; }
+  LoD lod() const { return lod_; }
  /*
   * Get a element from LoD.
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -186,6 +186,54 @@ void OperatorBase::GenerateTemporaryNames() {
  }
 }
 template <>
 const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const {
  auto* var = InputVar(name);
  if (var == nullptr) return nullptr;
  if (var->IsType<LoDTensor>()) {
    return &var->Get<LoDTensor>();
  }
  PADDLE_ENFORCE(var->IsType<Tensor>(),
                 "The Input(%s) must be LoDTensor or Tensor.");
  return &var->Get<Tensor>();
 }
 template <>
 const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
    const std::string& name) const {
  auto names = op().Inputs(name);
  std::vector<const Tensor*> res;
  res.reserve(names.size());
  std::transform(
      names.begin(), names.end(), std::back_inserter(res),
      [&](const std::string& sub_name) { return Input<Tensor>(sub_name); });
  return res;
 }
 template <>
 Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
  auto* var = OutputVar(name);
  if (var == nullptr) return nullptr;
  if (var->IsType<LoDTensor>()) {
    return const_cast<LoDTensor*>(&var->Get<LoDTensor>());
  }
  PADDLE_ENFORCE(var->IsType<Tensor>(),
                 "The Input(%s) must be LoDTensor or Tensor.");
  return const_cast<Tensor*>(&var->Get<Tensor>());
 }
 template <>
 std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
    const std::string& name) const {
  auto names = op().Outputs(name);
  std::vector<Tensor*> res;
  res.reserve(names.size());
  std::transform(
      names.begin(), names.end(), std::back_inserter(res),
      [&](const std::string& sub_name) { return Output<Tensor>(sub_name); });
  return res;
 }
 void OpProtoAndCheckerMaker::Validate() {
  validated_ = true;
  CheckNoDuplicatedInOutAttrs();
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -22,6 +22,7 @@ limitations under the License. */
 #include "op_info.h"
 #include "paddle/framework/attribute.h"
 #include "paddle/framework/framework.pb.h"
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/scope.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/platform/device_context.h"
@ -305,11 +306,9 @@ class InferShapeContext {
    auto names = op_.Inputs(name);
    std::vector<const T*> res;
    res.reserve(names.size());
-    std::transform(names.begin(), names.end(), std::back_inserter(res),
+    std::transform(
-                   [&](const std::string& sub_name) {
+        names.begin(), names.end(), std::back_inserter(res),
-                     auto var = scope_.FindVar(sub_name);
+        [&](const std::string& sub_name) { return Input<T>(sub_name); });
                     return var == nullptr ? nullptr : &var->Get<T>();
                   });
    return res;
  }
@ -318,11 +317,9 @@ class InferShapeContext {
    auto names = op_.Outputs(name);
    std::vector<T*> res;
    res.reserve(names.size());
-    std::transform(names.begin(), names.end(), std::back_inserter(res),
+    std::transform(
-                   [&](const std::string& sub_name) {
+        names.begin(), names.end(), std::back_inserter(res),
-                     auto var = scope_.FindVar(sub_name);
+        [&](const std::string& sub_name) { return Output<T>(sub_name); });
                     return var == nullptr ? nullptr : var->GetMutable<T>();
                   });
    return res;
  }
@ -363,6 +360,27 @@ class ExecutionContext : public InferShapeContext {
    return device_context_;
  }
  // redefine Output function,
  // use Variable::Get instead of Variable::GetMutable
  template <typename T>
  T* Output(const std::string& name) const {
    auto var = OutputVar(name);
    return var == nullptr ? nullptr : const_cast<T*>(&var->Get<T>());
  }
  // redefine MultiOutput function.
  // use Variable::Get instead of Variable::GetMutable
  template <typename T>
  std::vector<T*> MultiOutput(const std::string& name) const {
    auto names = op().Outputs(name);
    std::vector<T*> res;
    res.reserve(names.size());
    std::transform(
        names.begin(), names.end(), std::back_inserter(res),
        [&](const std::string& sub_name) { return Output<T>(sub_name); });
    return res;
  }
  const platform::DeviceContext* device_context_;
 };
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@ -16,8 +16,6 @@ limitations under the License. */
 #include "paddle/memory/memcpy.h"
 #include "paddle/platform/enforce.h"
 #include <glog/logging.h>
 namespace paddle {
 namespace framework {
@ -55,7 +53,6 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
 template <typename T>
 inline T* Tensor::mutable_data(platform::Place place) {
  LOG(INFO) << "------ mutable_data ----  ";
  static_assert(std::is_pod<T>::value, "T must be POD");
  PADDLE_ENFORCE_GT(numel(), 0,
                    "Tensor's numel must be larger than zero to call "
@ -145,7 +142,6 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
 }
 inline Tensor& Tensor::Resize(const DDim& dims) {
  LOG(INFO) << "---- resize -----";
  dims_ = dims;
  numel_ = product(dims_);
  return *this;
--- a/paddle/operators/add_op.cc
+++ b/paddle/operators/add_op.cc
@ -26,7 +26,8 @@ class AddOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims(),
                      ctx.Input<Tensor>("Y")->dims(),
                      "Two input of Add Op's dimension must be same.");
-    ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("X")->dims());
+    ctx.Output<framework::LoDTensor>("Out")->Resize(
        ctx.Input<Tensor>("X")->dims());
  }
 };
--- a/paddle/operators/concat_op.cc
+++ b/paddle/operators/concat_op.cc
@ -26,7 +26,7 @@ class ConcatOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
    auto ins = ctx.MultiInput<framework::Tensor>("X");
-    auto *out = ctx.Output<framework::Tensor>("Out");
+    auto *out = ctx.Output<framework::LoDTensor>("Out");
    size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
    size_t n = ins.size();
--- a/paddle/operators/cos_sim_op.cc
+++ b/paddle/operators/cos_sim_op.cc
@ -32,9 +32,9 @@ class CosSimOp : public framework::OperatorWithKernel {
                      "Dimensions of Input(X) and Input(Y) must be the same.");
    auto dims = ctx.Input<Tensor>("X")->dims();
-    ctx.Output<Tensor>("Out")->Resize({dims[0], 1});
+    ctx.Output<framework::LoDTensor>("Out")->Resize({dims[0], 1});
-    ctx.Output<Tensor>("XNorm")->Resize({dims[0], 1});
+    ctx.Output<framework::LoDTensor>("XNorm")->Resize({dims[0], 1});
-    ctx.Output<Tensor>("YNorm")->Resize({dims[0], 1});
+    ctx.Output<framework::LoDTensor>("YNorm")->Resize({dims[0], 1});
  }
 };
@ -88,8 +88,10 @@ class CosSimOpGrad : public framework::OperatorWithKernel {
                      "1st dimension of Out@GRAD must equal that of Input(X)");
    PADDLE_ENFORCE_EQ(out_dims[1], 1, "1st dimension of Out@GRAD must be one.");
-    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto *x_grad =
-    auto *y_grad = ctx.Output<Tensor>(framework::GradVarName("Y"));
+        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
    auto *y_grad =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
    if (x_grad) x_grad->Resize(x_dims);
    if (y_grad) y_grad->Resize(y_dims);
  }
--- a/paddle/operators/cross_entropy_op.cc
+++ b/paddle/operators/cross_entropy_op.cc
@ -29,7 +29,7 @@ class OnehotCrossEntropyOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(X->dims().size(), 2, "X's dimension must be 2.");
    PADDLE_ENFORCE_EQ(label->dims().size(), 1, "label's dimension must be 1.");
    PADDLE_ENFORCE_EQ(X->dims()[0], label->dims()[0]);
-    ctx.Output<Tensor>("Y")->Resize({X->dims()[0]});
+    ctx.Output<framework::LoDTensor>("Y")->Resize({X->dims()[0]});
  }
 };
@ -39,7 +39,7 @@ class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto dX = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
    auto X = ctx.Input<Tensor>("X");
    dX->Resize(X->dims());
--- a/paddle/operators/fill_zeros_like_op.cc
+++ b/paddle/operators/fill_zeros_like_op.cc
@ -23,7 +23,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<framework::Tensor>("Dst")->Resize(
+    ctx.Output<framework::LoDTensor>("Dst")->Resize(
        ctx.Input<framework::Tensor>("Src")->dims());
  }
 };
--- a/paddle/operators/gather_op.cc
+++ b/paddle/operators/gather_op.cc
@ -28,7 +28,7 @@ class GatherOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0");
    framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
    output_dims[0] = batch_size;
-    ctx.Output<Tensor>("Out")->Resize(output_dims);
+    ctx.Output<framework::LoDTensor>("Out")->Resize(output_dims);
  }
 };
@ -38,7 +38,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto X_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
    auto X = ctx.Input<Tensor>("X");
    X_grad->Resize(X->dims());
--- a/paddle/operators/gaussian_random_op.cc
+++ b/paddle/operators/gaussian_random_op.cc
@ -44,7 +44,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext& context) const override {
-    auto* tensor = context.Output<framework::Tensor>("Out");
+    auto* tensor = context.Output<framework::LoDTensor>("Out");
    auto dims = Attr<std::vector<int>>("dims");
    std::vector<int64_t> temp;
    temp.reserve(dims.size());
--- a/paddle/operators/lookup_table_op.cc
+++ b/paddle/operators/lookup_table_op.cc
@ -25,7 +25,7 @@ class LookupTableOp : public framework::OperatorWithKernel {
  void InferShape(const framework::InferShapeContext &context) const override {
    auto table_t = context.Input<Tensor>("W");
    auto ids_t = context.Input<Tensor>("Ids");
-    auto output_t = context.Output<Tensor>("Out");
+    auto output_t = context.Output<framework::LoDTensor>("Out");
    output_t->Resize({ids_t->dims()[0], table_t->dims()[1]});
  }
@ -56,7 +56,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &context) const override {
    auto table = context.Input<Tensor>("W");
-    auto d_table = context.Output<Tensor>(framework::GradVarName("W"));
+    auto d_table =
        context.Output<framework::LoDTensor>(framework::GradVarName("W"));
    d_table->Resize(table->dims());
  }
 };
--- a/paddle/operators/mean_op.cc
+++ b/paddle/operators/mean_op.cc
@ -25,7 +25,7 @@ class MeanOp : public framework::OperatorWithKernel {
  void InferShape(const framework::InferShapeContext &ctx) const override {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
                            "Input of MeanOp must be initialized.");
-    ctx.Output<Tensor>("Out")->Resize({1});
+    ctx.Output<framework::LoDTensor>("Out")->Resize({1});
  }
 };
@ -45,7 +45,7 @@ class MeanGradOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<Tensor>(framework::GradVarName("X"))
+    ctx.Output<framework::LoDTensor>(framework::GradVarName("X"))
        ->Resize(ctx.Input<Tensor>("X")->dims());
  }
 };
--- a/paddle/operators/minus_op.cc
+++ b/paddle/operators/minus_op.cc
@ -33,7 +33,7 @@ class MinusOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(
        left_tensor->numel(), right_tensor->numel(),
        "Minus operator must take two tensor with same num of elements");
-    ctx.Output<framework::Tensor>("Out")->Resize(left_tensor->dims());
+    ctx.Output<framework::LoDTensor>("Out")->Resize(left_tensor->dims());
  }
 };
--- a/paddle/operators/mul_op.cc
+++ b/paddle/operators/mul_op.cc
@ -18,6 +18,7 @@ namespace paddle {
 namespace operators {
 using framework::Tensor;
 using framework::LoDTensor;
 class MulOp : public framework::OperatorWithKernel {
 public:
@ -45,7 +46,8 @@ class MulOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(
        x_mat_dims[1], y_mat_dims[0],
        "First matrix's width must be equal with second matrix's height.");
-    ctx.Output<Tensor>("Out")->Resize({x_mat_dims[0], y_mat_dims[1]});
+    ctx.Output<framework::LoDTensor>("Out")->Resize(
        {x_mat_dims[0], y_mat_dims[1]});
  }
 };
@ -94,8 +96,10 @@ class MulOpGrad : public framework::OperatorWithKernel {
    auto x_dims = ctx.Input<Tensor>("X")->dims();
    auto y_dims = ctx.Input<Tensor>("Y")->dims();
    auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
-    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto *x_grad =
-    auto *y_grad = ctx.Output<Tensor>(framework::GradVarName("Y"));
+        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
    auto *y_grad =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
    auto x_mat_dims =
        framework::flatten_to_2d(x_dims, Attr<int>("x_num_col_dims"));
--- a/paddle/operators/recurrent_op.cc
+++ b/paddle/operators/recurrent_op.cc
@ -26,10 +26,11 @@ namespace operators {
 using Scope = framework::Scope;
 using Variable = framework::Variable;
 using Tensor = framework::Tensor;
 using LoDTensor = framework::LoDTensor;
 void RecurrentAlgorithm::InferShape(const Scope& scope) const {
  seq_len_ = scope.FindVar((arg_->inlinks[0]).external)
-                 ->GetMutable<Tensor>()
+                 ->GetMutable<LoDTensor>()
                 ->dims()[0];
  CreateScopes(scope);
  auto step_scopes = GetStepScopes(scope);
@ -88,7 +89,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
        // the weight are located in parent scope
        for (auto& var_name : input.second) {
          if (!step_scope.FindVar(var_name)) {
-            step_scope.NewVar(var_name)->GetMutable<Tensor>();
+            step_scope.NewVar(var_name)->GetMutable<LoDTensor>();
          }
        }
      }
@ -106,11 +107,12 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
 void RecurrentAlgorithm::InitMemories(Scope* step_scope,
                                      bool infer_shape_mode) const {
  for (auto& attr : arg_->memories) {
-    Tensor* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<Tensor>();
+    auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<LoDTensor>();
    PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
                   "memory [%s]'s boot variable [%s] not exists", attr.var,
                   attr.boot_var);
-    Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable<Tensor>();
+    auto* boot_mem =
        step_scope->FindVar(attr.boot_var)->GetMutable<LoDTensor>();
    if (infer_shape_mode) {
      pre_mem->Resize(boot_mem->dims());
      PADDLE_ENFORCE_EQ(pre_mem->dims().size(), 2);
@ -192,9 +194,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
                   "memory variable [%s] does not exists", attr.var);
    PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
                   "boot variable [%s] does not exists", attr.boot_var);
-    Tensor* mem_grad = step_scope->NewVar(attr.var)->GetMutable<Tensor>();
+    auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable<LoDTensor>();
-    Tensor* boot_mem_grad =
+    auto* boot_mem_grad =
-        step_scope->NewVar(attr.boot_var)->GetMutable<Tensor>();
+        step_scope->NewVar(attr.boot_var)->GetMutable<LoDTensor>();
    if (infer_shape_mode) {
      boot_mem_grad->Resize(mem_grad->dims());
    } else {
@ -205,7 +207,7 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
 void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const {
  seq_len_ = scope.FindVar((arg_->inlinks[0]).external)
-                 ->GetMutable<Tensor>()
+                 ->GetMutable<LoDTensor>()
                 ->dims()[0];
  auto step_scopes = GetStepScopes(scope);
  rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_,
--- a/paddle/operators/reshape_op.cc
+++ b/paddle/operators/reshape_op.cc
@ -46,7 +46,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
    std::transform(shape.begin(), shape.end(), shape_int64.begin(),
                   [](int a) { return static_cast<int64_t>(a); });
    auto out_dims = framework::make_ddim(shape_int64);
-    ctx.Output<framework::Tensor>("Out")->Resize(out_dims);
+    ctx.Output<framework::LoDTensor>("Out")->Resize(out_dims);
  }
 };
@ -90,7 +90,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                            "Input(Out@GRAD) shouldn't be null.");
    auto dims = ctx.Input<framework::Tensor>("X")->dims();
-    auto *d_in = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto *d_in = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
    d_in->Resize(dims);
  }
 };
--- a/paddle/operators/rowwise_add_op.cc
+++ b/paddle/operators/rowwise_add_op.cc
@ -37,7 +37,7 @@ class RowwiseAddOp : public framework::OperatorWithKernel {
        framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims,
        "The width of two operands must be same");
    PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1");
-    ctx.Output<Tensor>("Out")->Resize(x_dims);
+    ctx.Output<framework::LoDTensor>("Out")->Resize(x_dims);
  }
 };
@ -76,8 +76,8 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(
        framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims,
        "The width of two operands must be same");
-    auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto *dx = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
-    auto *db = ctx.Output<Tensor>(framework::GradVarName("b"));
+    auto *db = ctx.Output<framework::LoDTensor>(framework::GradVarName("b"));
    if (dx) dx->Resize(x_dims);
    if (db) db->Resize(b_dims);
  }
--- a/paddle/operators/scale_op.cc
+++ b/paddle/operators/scale_op.cc
@ -28,7 +28,7 @@ class ScaleOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
    auto *in = ctx.Input<framework::Tensor>("X");
-    auto *out = ctx.Output<framework::Tensor>("Out");
+    auto *out = ctx.Output<framework::LoDTensor>("Out");
    out->Resize(in->dims());
  }
 };
--- a/paddle/operators/scatter_op.cc
+++ b/paddle/operators/scatter_op.cc
@ -35,7 +35,8 @@ class ScatterOp : public framework::OperatorWithKernel {
    framework::DDim data_dim(ctx.Input<Tensor>("Updates")->dims());
    for (int i = 1; i < data_dim.size(); ++i)
      PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input<Tensor>("Updates")->dims()[i]);
-    ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("Ref")->dims());
+    ctx.Output<framework::LoDTensor>("Out")->Resize(
        ctx.Input<Tensor>("Ref")->dims());
  }
 };
@ -45,9 +46,11 @@ class ScatterGradOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto *dUpdates = ctx.Output<Tensor>(framework::GradVarName("Updates"));
+    auto *dUpdates =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("Updates"));
    auto *Updates = ctx.Input<Tensor>("Updates");
-    auto *dRef = ctx.Output<Tensor>(framework::GradVarName("Ref"));
+    auto *dRef =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("Ref"));
    auto *Ref = ctx.Input<Tensor>("Ref");
    dRef->Resize(Ref->dims());
--- a/paddle/operators/sequence_avg_pool_op.cc
+++ b/paddle/operators/sequence_avg_pool_op.cc
@ -0,0 +1,90 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/operators/sequence_avg_pool_op.h"
 namespace paddle {
 namespace operators {
 class SequenceAvgPoolOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(const framework::InferShapeContext& ctx) const override {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
                            "Input of SequenceAvgPoolOp"
                            "must be initialized.");
    auto* x = ctx.Input<framework::LoDTensor>("X");
    auto dims = x->dims();
    auto lod = x->lod();
    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
    PADDLE_ENFORCE_GE(
        dims[0],
        /*batch size = */ static_cast<int64_t>(lod[0].size() - 1),
        "The first dimension of Input(X) must be large than batch size.");
    dims[0] = lod[0].size() - 1;
    ctx.Output<framework::LoDTensor>("Out")->Resize({dims});
  }
 };
 class SequenceAvgPoolOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  SequenceAvgPoolOpMaker(framework::OpProto* proto,
                         framework::OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X", "Input of SequenceAvgPoolOp.");
    AddOutput("Out", "The output of SequenceAvgPoolOp.");
    AddComment(R"DOC(
    SequenceAvgPoolOp averages features of all time-steps of each instance.
    More detailed comments will be added later.
    )DOC");
  }
 };
 class SequenceAvgPoolGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(const framework::InferShapeContext& ctx) const override {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                            "Gradient of Out should not be null");
    auto og_dims =
        ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"))->dims();
    auto x_dims = ctx.Input<framework::LoDTensor>("X")->dims();
    PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(),
                      "The rank of output grad must equal to Input(X).");
    for (size_t i = 1; i < og_dims.size(); ++i) {
      PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch.");
    }
    auto* x_grad =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
    x_grad->Resize(x_dims);
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP(sequence_avg_pool, ops::SequenceAvgPoolOp,
            ops::SequenceAvgPoolOpMaker, sequence_avg_pool_grad,
            ops::SequenceAvgPoolGradOp);
 REGISTER_OP_CPU_KERNEL(
    sequence_avg_pool,
    ops::SequenceAvgPoolKernel<paddle::platform::CPUPlace, float>);
 REGISTER_OP_CPU_KERNEL(
    sequence_avg_pool_grad,
    ops::SequenceAvgPoolGradKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/sequence_avg_pool_op.cu
+++ b/paddle/operators/sequence_avg_pool_op.cu
@ -0,0 +1,25 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License. */
 #define EIGEN_USE_GPU
 #include "paddle/operators/sequence_avg_pool_op.h"
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(
    sequence_avg_pool,
    ops::SequenceAvgPoolKernel<paddle::platform::GPUPlace, float>);
 REGISTER_OP_GPU_KERNEL(
    sequence_avg_pool_grad,
    ops::SequenceAvgPoolGradKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/sequence_avg_pool_op.h
+++ b/paddle/operators/sequence_avg_pool_op.h
@ -0,0 +1,81 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
 using LoDTensor = framework::LoDTensor;
 template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
 template <typename Place, typename T>
 class SequenceAvgPoolKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* in = context.Input<LoDTensor>("X");
    auto* out = context.Output<LoDTensor>("Out");
    auto dims = in->dims();
    auto lod = in->lod();
    int64_t w = in->numel() / dims[0];
    out->mutable_data<T>(context.GetPlace());
    auto place = context.GetEigenDevice<Place>();
    for (int i = 0; i < lod[0].size() - 1; ++i) {
      Tensor in_t = in->Slice<T>(static_cast<int>(lod[0][i]),
                                 static_cast<int>(lod[0][i + 1]));
      Tensor out_t = out->Slice<T>(i, i + 1);
      int64_t h = static_cast<int64_t>(lod[0][i + 1] - lod[0][i]);
      auto in_e = EigenMatrix<T>::From(in_t, {h, w});
      auto out_e = EigenMatrix<T>::From(out_t, {h, w});
      out_e.device(place) = in_e.mean(Eigen::array<int, 1>({{0}}));
    }
  }
 };
 template <typename Place, typename T>
 class SequenceAvgPoolGradKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* in = context.Output<LoDTensor>("X");
    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
    auto dims = in->dims();
    auto lod = in->lod();
    int64_t w = in->numel() / dims[0];
    in_g->mutable_data<T>(context.GetPlace());
    auto place = context.GetEigenDevice<Place>();
    for (int i = 0; i < lod[0].size() - 1; ++i) {
      auto in_g_t = in_g->Slice<T>(static_cast<int>(lod[0][i]),
                                   static_cast<int>(lod[0][i + 1]));
      auto out_g_t = out_g->Slice<T>(i, i + 1);
      int64_t h = static_cast<int64_t>(lod[0][i + 1] - lod[0][i]);
      auto in_g_e = EigenMatrix<T>::From(in_g_t, {h, w});
      auto out_g_e = EigenMatrix<T>::From(out_g_t, {1, w});
      Eigen::DSizes<int, 2> bcast(h, w);
      in_g_e.device(place) = (out_g_e / static_cast<T>(h)).broadcast(bcast);
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/sgd_op.cc
+++ b/paddle/operators/sgd_op.cc
@ -23,10 +23,11 @@ class SGDOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE(
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("param")->dims(),
-        ctx.Input<Tensor>("param")->dims() == ctx.Input<Tensor>("grad")->dims(),
+                      ctx.Input<Tensor>("grad")->dims(),
                      "Two input of SGD Op's dimension must be same.");
-    ctx.Output<Tensor>("param_out")->Resize(ctx.Input<Tensor>("param")->dims());
+    ctx.Output<framework::LoDTensor>("param_out")
        ->Resize(ctx.Input<Tensor>("param")->dims());
  }
 };
--- a/paddle/operators/sigmoid_op.cc
+++ b/paddle/operators/sigmoid_op.cc
@ -23,7 +23,8 @@ class SigmoidOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
+    ctx.Output<framework::LoDTensor>("Y")->Resize(
        ctx.Input<Tensor>("X")->dims());
  }
 };
@ -44,7 +45,7 @@ class SigmoidOpGrad : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<Tensor>(framework::GradVarName("X"))
+    ctx.Output<framework::LoDTensor>(framework::GradVarName("X"))
        ->Resize(ctx.Input<Tensor>("Y")->dims());
  }
 };
--- a/Show More
+++ b/Show More