Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/pybind_for_protobuf_desc

8 years ago · 6db6475460
parent 37fd8fa1b6 65874bb568
commit 6db6475460
76 changed files with 1564 additions and 564 deletions
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -106,22 +106,22 @@ function(merge_static_libs TARGET_NAME)
  endforeach()
  list(REMOVE_DUPLICATES libs_deps)
  if(APPLE) # Use OSX's libtool to merge archives
  # To produce a library we need at least one source file.
  # It is created by add_custom_command below and will helps
  # also help to track dependencies.
-    set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
+  set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
  if(APPLE) # Use OSX's libtool to merge archives
    # Make the generated dummy source file depended on all static input
    # libs. If input lib changes,the source file is touched
    # which causes the desired effect (relink).
-    add_custom_command(OUTPUT ${dummyfile}
+    add_custom_command(OUTPUT ${target_SRCS}
-      COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile}
+      COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
      DEPENDS ${libs})
    # Generate dummy staic lib
-    file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
+    file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";")
-    add_library(${TARGET_NAME} STATIC ${dummyfile})
+    add_library(${TARGET_NAME} STATIC ${target_SRCS})
    target_link_libraries(${TARGET_NAME} ${libs_deps})
    foreach(lib ${libs})
@ -130,11 +130,14 @@ function(merge_static_libs TARGET_NAME)
    endforeach()
    add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
      COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
-      COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles})
+      COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
      )
  else() # general UNIX: use "ar" to extract objects and re-add to a common lib
    set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
    foreach(lib ${libs})
-      set(objlistfile ${lib}.objlist) # list of objects in the input library
+      set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
-      set(objdir ${lib}.objdir)
+      set(objdir ${target_DIR}/${lib}.objdir)
      add_custom_command(OUTPUT ${objdir}
        COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
@ -142,31 +145,32 @@ function(merge_static_libs TARGET_NAME)
      add_custom_command(OUTPUT ${objlistfile}
        COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
-        COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ../${objlistfile}
+        COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
        DEPENDS ${lib} ${objdir}
        WORKING_DIRECTORY ${objdir})
-      # Empty dummy source file that goes into merged library		
+      list(APPEND target_OBJS "${objlistfile}")
      set(mergebase ${lib}.mergebase.c)		
      add_custom_command(OUTPUT ${mergebase}		
        COMMAND ${CMAKE_COMMAND} -E touch ${mergebase}		
        DEPENDS ${objlistfile})		
      list(APPEND mergebases "${mergebase}")
    endforeach()
-    add_library(${TARGET_NAME} STATIC ${mergebases})
+    # Make the generated dummy source file depended on all static input
    # libs. If input lib changes,the source file is touched
    # which causes the desired effect (relink).
    add_custom_command(OUTPUT ${target_SRCS}
      COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
      DEPENDS ${libs} ${target_OBJS})
    # Generate dummy staic lib
    file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";")
    add_library(${TARGET_NAME} STATIC ${target_SRCS})
    target_link_libraries(${TARGET_NAME} ${libs_deps})
    # Get the file name of the generated library
-    set(outlibfile "$<TARGET_FILE:${TARGET_NAME}>")
+    set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
    foreach(lib ${libs})
    add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
-        COMMAND ${CMAKE_AR} cr ${outlibfile} *.o
+        COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
-        COMMAND ${CMAKE_RANLIB} ${outlibfile}
+        COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
-        WORKING_DIRECTORY ${lib}.objdir)
+        WORKING_DIRECTORY ${target_DIR})
    endforeach()
  endif()
 endfunction(merge_static_libs)
--- a/doc/faq/index_cn.rst
+++ b/doc/faq/index_cn.rst
@ -158,17 +158,23 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID，相同名字
 这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。
-7. \*-cp27mu-linux_x86_64.whl is not a supported wheel on this platform.
+7. paddlepaddle\*.whl is not a supported wheel on this platform.
 ------------------------------------------------------------------------
-出现这个问题的主要原因是，系统编译wheel包的时候，使用的 :code:`wheel` 包是最新的，
+出现这个问题的主要原因是，没有找到和当前系统匹配的paddlepaddle安装包。最新的paddlepaddle python安装包支持Linux x86_64和MacOS 10.12操作系统，并安装了python 2.7和pip 9.0.1。
-而系统中的 :code:`pip` 包比较老。具体的解决方法是，更新 :code:`pip` 包并重新编译PaddlePaddle。
+
 更新 :code:`pip` 包的方法是\:
 ..  code-block:: bash
    pip install --upgrade pip
 如果还不行，可以执行 :code:`python -c "import pip; print(pip.pep425tags.get_supported())"` 获取当前系统支持的python包的后缀，
 并对比是否和正在安装的后缀一致。
 如果系统支持的是 :code:`linux_x86_64` 而安装包是 :code:`manylinux1_x86_64` ，需要升级pip版本到最新；
 如果系统支持 :code:`manylinux1_x86_64` 而安装包（本地）是 :code:`linux_x86_64` ，可以重命名这个whl包为 :code:`manylinux1_x86_64` 再安装。
 8.  python相关的单元测试都过不了
 --------------------------------
@ -310,7 +316,7 @@ Paddle二进制在运行时捕获了浮点数异常，只要出现浮点数异
 * 模型一直不收敛，发散到了一个数值特别大的地方。
 * 训练数据有问题，导致参数收敛到了一些奇异的情况。或者输入数据尺度过大，有些特征的取值达到数百万，这时进行矩阵乘法运算就可能导致浮点数溢出。
-主要的解决办法是减小学习律或者对数据进行归一化处理。
+主要的解决办法是减小学习率或者对数据进行归一化处理。
 15. 编译安装后执行 import paddle.v2 as paddle 报ImportError: No module named v2
 ------------------------------------------------------------------------
@ -373,3 +379,15 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数
    parameters = paddle.parameters.create(my_cost)
    parameters.set('emb', load_parameter(emb_param_file, 30000, 256))
 18. 集群多节点训练，日志中保存均为网络通信类错误
 ------------------------------
 集群多节点训练，日志报错为网络通信类错误，比如 :code:`Connection reset by peer` 等。
 此类报错通常是由于某一个节点的错误导致这个节点的训练进程退出，从而引发其他节点无法连接导致，可以参考下面的步骤排查：
 * 从 :code:`train.log` ， :code:`server.log` 找到最早报错的地方，查看是否是其他错误引发的报错（比如FPE，内存不足，磁盘空间不足等）。
 * 如果发现最早的报错就是网络通信的问题，很有可能是非独占方式执行导致的端口冲突，可以联系OP，看当前MPI集群是否支持resource=full参数提交，如果支持增加此参数提交，并更换job 端口。
 * 如果当前MPI集群并不支持任务独占模式，可以联系OP是否可以更换集群或升级当前集群。
--- a/paddle/capi/CMakeLists.txt
+++ b/paddle/capi/CMakeLists.txt
@ -62,6 +62,7 @@ if(ANDROID)
          LIBRARY DESTINATION lib/${ANDROID_ABI})
  execute_process(
    COMMAND ${GIT_EXECUTABLE} log --pretty=oneline -1
    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
    OUTPUT_VARIABLE GIT_COMMITS_LIST
    RESULT_VARIABLE GIT_COMMITS_LIST_RESULT
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
@ -81,8 +82,7 @@ if(ANDROID)
      )"
  )
 else(ANDROID)
-  install(TARGETS paddle_capi_whole
+  install(TARGETS paddle_capi_whole ARCHIVE DESTINATION lib)
          ARCHIVE DESTINATION lib)
  if(NOT IOS)
    install(TARGETS paddle_capi_shared DESTINATION lib)
  endif()
--- a/paddle/framework/attribute.cc
+++ b/paddle/framework/attribute.cc
@ -31,6 +31,10 @@ ProgramDesc& GetProgramDesc() {
  return *g_program_desc;
 }
 template <>
 AttrType AttrTypeID<bool>() {
  return BOOLEAN;
 }
 template <>
 AttrType AttrTypeID<int>() {
  return INT;
@ -44,6 +48,10 @@ AttrType AttrTypeID<std::string>() {
  return STRING;
 }
 template <>
 AttrType AttrTypeID<std::vector<bool>>() {
  return BOOLEANS;
 }
 template <>
 AttrType AttrTypeID<std::vector<int>>() {
  return INTS;
 }
@ -66,6 +74,9 @@ AttrType AttrTypeID<BlockDesc>() {
 Attribute GetAttrValue(const OpDesc::Attr& attr_desc) {
  switch (attr_desc.type()) {
    case framework::AttrType::BOOLEAN: {
      return attr_desc.b();
    }
    case framework::AttrType::INT: {
      return attr_desc.i();
    }
@ -75,6 +86,13 @@ Attribute GetAttrValue(const OpDesc::Attr& attr_desc) {
    case framework::AttrType::STRING: {
      return attr_desc.s();
    }
    case framework::AttrType::BOOLEANS: {
      std::vector<bool> val(attr_desc.bools_size());
      for (int i = 0; i < attr_desc.bools_size(); ++i) {
        val[i] = attr_desc.bools(i);
      }
      return val;
    }
    case framework::AttrType::INTS: {
      std::vector<int> val(attr_desc.ints_size());
      for (int i = 0; i < attr_desc.ints_size(); ++i) {
--- a/paddle/framework/attribute.h
+++ b/paddle/framework/attribute.h
@ -27,8 +27,9 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
-typedef boost::variant<boost::blank, int, float, std::string, std::vector<int>,
+typedef boost::variant<boost::blank, bool, int, float, std::string,
-                       std::vector<float>, std::vector<std::string>,
+                       std::vector<bool>, std::vector<int>, std::vector<float>,
                       std::vector<std::string>,
                       std::vector<std::pair<int, int>>, BlockDesc*>
    Attribute;
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@ -166,9 +166,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
        // If part of input gradient of that operator is not calculated, fill
        // zero variables to that input gradient.
-        net->AppendOp(OpRegistry::CreateOp("fill_zeros_like",
+        net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
-                                           {{"Src", {prefix}}},
+                                           {{"Y", {grad_input}}}, {}));
                                           {{"Dst", {grad_input}}}, {}));
      }
      return false;
    });
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@ -127,8 +127,8 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker {
 public:
  FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("Src", "x");
+    AddInput("X", "x");
-    AddOutput("Dst", "out");
+    AddOutput("Y", "out");
    AddComment("");
  }
 };
@ -325,10 +325,10 @@ TEST(Backward, op_part_of_output_are_not_need) {
  auto &fill_zero = *net->ops_[0];
  ASSERT_EQ("fill_zeros_like", fill_zero.Type());
-  ASSERT_EQ(1UL, fill_zero.Inputs("Src").size());
+  ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
-  ASSERT_EQ("Z", fill_zero.Input("Src"));
+  ASSERT_EQ("Z", fill_zero.Input("X"));
-  ASSERT_EQ(1UL, fill_zero.Outputs("Dst").size());
+  ASSERT_EQ(1UL, fill_zero.Outputs("Y").size());
-  ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Dst"));
+  ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Y"));
  auto &d_many_out = *net->ops_[1];
  ASSERT_EQ("many_output_op_grad", d_many_out.Type());
--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@ -23,7 +23,9 @@ enum AttrType {
  FLOATS = 4;
  STRINGS = 5;
  INT_PAIRS = 6;
-  BLOCK = 7;
+  BOOLEAN = 7;
  BOOLEANS = 8;
  BLOCK = 9;
 }
 message IntPair {
@ -45,7 +47,9 @@ message OpDesc {
    repeated float floats = 7;
    repeated string strings = 8;
    repeated IntPair int_pairs = 9;
-    optional int32 block_idx = 10;
+    optional bool b = 10;
    repeated bool bools = 11;
    optional int32 block_idx = 12;
  };
  message Var {
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -207,23 +207,22 @@ const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
 }
 template <>
-Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
+Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const {
-  auto* var = OutputVar(name);
+  auto var = OutputVar(name);
-  return var == nullptr ? nullptr : const_cast<Tensor*>(GetTensorFromVar(var));
+  return var == nullptr ? nullptr : var->GetMutable<LoDTensor>();
 }
 template <>
-std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
+std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
    const std::string& name) const {
  auto names = op().Outputs(name);
  std::vector<Tensor*> res;
  res.reserve(names.size());
  std::transform(names.begin(), names.end(), std::back_inserter(res),
                 [&](const std::string& sub_name) {
-                   auto var = scope().FindVar(sub_name);
+                   auto var = scope_.FindVar(sub_name);
-                   return var == nullptr
+                   return var == nullptr ? nullptr
-                              ? nullptr
+                                         : var->GetMutable<LoDTensor>();
                              : const_cast<Tensor*>(GetTensorFromVar(var));
                 });
  return res;
 }
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -212,9 +212,9 @@ class InferShapeContext {
    return res;
  }
-  std::vector<const Variable*> MultiOutputVar(const std::string& name) const {
+  std::vector<Variable*> MultiOutputVar(const std::string& name) const {
    auto names = op_.Outputs(name);
-    std::vector<const Variable*> res;
+    std::vector<Variable*> res;
    res.reserve(names.size());
    std::transform(names.begin(), names.end(), std::back_inserter(res),
                   [this](const std::string& name) {
@ -271,6 +271,20 @@ class InferShapeContext {
    return &var->Get<Tensor>();
  }
  void ShareLoD(const std::string& in, const std::string& out, size_t i = 0,
                size_t j = 0) const {
    PADDLE_ENFORCE_LT(i, InputSize(in));
    PADDLE_ENFORCE_LT(j, OutputSize(out));
    auto* in_var = MultiInputVar(in)[i];
    auto* out_var = MultiOutputVar(out)[j];
    if (!in_var->IsType<LoDTensor>()) return;
    PADDLE_ENFORCE(out_var->IsType<LoDTensor>(),
                   "The %d-th output of Output(%s) must be LoDTensor.", j, out);
    auto in_tensor = in_var->Get<LoDTensor>();
    auto* out_tensor = out_var->GetMutable<LoDTensor>();
    out_tensor->set_lod(in_tensor.lod());
  }
 private:
  const OperatorBase& op_;
  const Scope& scope_;
@ -283,6 +297,13 @@ template <>
 const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
    const std::string& name) const;
 template <>
 Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const;
 template <>
 std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
    const std::string& name) const;
 template <typename T>
 struct EigenDeviceConverter;
@ -315,38 +336,10 @@ class ExecutionContext : public InferShapeContext {
    return device_context_;
  }
  // redefine Output function,
  // use Variable::Get instead of Variable::GetMutable
  template <typename T>
  T* Output(const std::string& name) const {
    auto var = OutputVar(name);
    return var == nullptr ? nullptr : const_cast<T*>(&var->Get<T>());
  }
  // redefine MultiOutput function.
  // use Variable::Get instead of Variable::GetMutable
  template <typename T>
  std::vector<T*> MultiOutput(const std::string& name) const {
    auto names = op().Outputs(name);
    std::vector<T*> res;
    res.reserve(names.size());
    std::transform(
        names.begin(), names.end(), std::back_inserter(res),
        [&](const std::string& sub_name) { return Output<T>(sub_name); });
    return res;
  }
 private:
  const platform::DeviceContext& device_context_;
 };
 template <>
 Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const;
 template <>
 std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
    const std::string& name) const;
 class OpKernel {
 public:
  /**
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@ -165,12 +165,6 @@ class Tensor {
  /*! points to dimensions of memory block. */
  DDim dims_;
  /**
   * A cache of the number of elements in a tensor.
   * Would be 0 for an uninitialized tensor.
   */
  int64_t numel_;
  /**
   * @brief   A PlaceHolder may be shared by more than one tensor.
   *
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@ -147,13 +147,12 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
 inline Tensor& Tensor::Resize(const DDim& dims) {
  dims_ = dims;
  numel_ = product(dims_);
  return *this;
 }
 inline const DDim& Tensor::dims() const { return dims_; }
-inline int64_t Tensor::numel() const { return numel_; }
+inline int64_t Tensor::numel() const { return product(dims_); }
 template <typename T>
 inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
--- a/paddle/operators/accuracy_op.cc
+++ b/paddle/operators/accuracy_op.cc
@ -39,7 +39,8 @@ class AccuracyOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(inference->dims()[0], label->dims()[0],
                      "inference size must be the same as label size");
-    ctx.Output<framework::LoDTensor>("Accuracy")->Resize({1});
+    ctx.Output<framework::Tensor>("Accuracy")->Resize({1});
    ctx.ShareLoD("Inference", /*->*/ "Accuracy");
  }
 };
@ -54,11 +55,15 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
    // TODO(typhoonzero): AddInput("Weight", ...
    AddOutput("Accuracy", "The accuracy of current batch");
-    AddComment(
+    AddComment(R"DOC(
-        R"DOC(Accuracy. It will print accuracy rate for classification.
+Accuracy. It will print accuracy rate for classification.
 The accuracy is:
 ..  math::
-accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})DOC");
+accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
 Both the input `Inference` and `Label` can carry the LoD (Level of Details)
 information, or not. But the output only shares the LoD with input `Inference`.
 )DOC");
  }
 };
--- a/paddle/operators/activation_op.cc
+++ b/paddle/operators/activation_op.cc
@ -23,8 +23,9 @@ class ActivationOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<framework::LoDTensor>("Y")->Resize(
+    ctx.Output<framework::Tensor>("Y")->Resize(
        ctx.Input<framework::Tensor>("X")->dims());
    ctx.ShareLoD("X", /*->*/ "Y");
  }
 };
@ -34,7 +35,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<framework::LoDTensor>(framework::GradVarName("X"))
+    ctx.Output<framework::Tensor>(framework::GradVarName("X"))
        ->Resize(ctx.Input<framework::Tensor>("Y")->dims());
  }
 };
--- a/paddle/operators/add_op.cc
+++ b/paddle/operators/add_op.cc
@ -33,7 +33,7 @@ class AddOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims(),
                      ctx.Input<Tensor>("Y")->dims(),
                      "Two input of Add Op's dimension must be same.");
-    ctx.Output<framework::LoDTensor>("Out")->Resize(
+    ctx.Output<framework::Tensor>("Out")->Resize(
        ctx.Input<Tensor>("X")->dims());
  }
 };
--- a/paddle/operators/clip_op.cc
+++ b/paddle/operators/clip_op.cc
@ -17,8 +17,6 @@
 namespace paddle {
 namespace operators {
 using framework::LoDTensor;
 class ClipOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@ -29,11 +27,12 @@ class ClipOp : public framework::OperatorWithKernel {
                            "Input(X) of ClipOp should not be null.");
    PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
                            "Output(Out) of ClipOp should not be null.");
-    auto x_dims = ctx.Input<LoDTensor>("X")->dims();
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
    auto max = Attr<float>("max");
    auto min = Attr<float>("min");
    PADDLE_ENFORCE_LT(min, max, "max should be greater than min.");
-    ctx.Output<LoDTensor>("Out")->Resize(x_dims);
+    ctx.Output<Tensor>("Out")->Resize(x_dims);
    ctx.ShareLoD("X", /*->*/ "Out");
  }
 };
@ -66,8 +65,8 @@ class ClipOpGrad : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                            "Input(Out@GRAD) should not be null");
-    auto x_dims = ctx.Input<LoDTensor>("X")->dims();
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
-    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
    if (x_grad != nullptr) {
      x_grad->Resize(x_dims);
    }
--- a/paddle/operators/concat_op.cc
+++ b/paddle/operators/concat_op.cc
@ -29,7 +29,7 @@ class ConcatOp : public framework::OperatorWithKernel {
                            "Output(Out) of ConcatOp should not be null.");
    auto ins = ctx.MultiInput<framework::Tensor>("X");
-    auto *out = ctx.Output<framework::LoDTensor>("Out");
+    auto *out = ctx.Output<framework::Tensor>("Out");
    size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
    size_t n = ins.size();
--- a/paddle/operators/conv2d_op.cc
+++ b/paddle/operators/conv2d_op.cc
@ -37,7 +37,7 @@ class Conv2DOp : public framework::OperatorWithKernel {
    auto in = ctx.Input<Tensor>("Input");
    auto filter = ctx.Input<Tensor>("Filter");
-    auto out = ctx.Output<framework::LoDTensor>("Output");
+    auto out = ctx.Output<framework::Tensor>("Output");
    std::vector<int> strides = Attr<std::vector<int>>("strides");
    std::vector<int> paddings = Attr<std::vector<int>>("paddings");
    int groups = Attr<int>("groups");
@ -111,10 +111,9 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
  void InferShape(const framework::InferShapeContext &ctx) const override {
    auto in = ctx.Input<Tensor>("Input");
    auto filter = ctx.Input<Tensor>("Filter");
-    auto d_in =
+    auto d_in = ctx.Output<framework::Tensor>(framework::GradVarName("Input"));
        ctx.Output<framework::LoDTensor>(framework::GradVarName("Input"));
    auto d_filter =
-        ctx.Output<framework::LoDTensor>(framework::GradVarName("Filter"));
+        ctx.Output<framework::Tensor>(framework::GradVarName("Filter"));
    if (d_in) d_in->Resize(in->dims());
    if (d_filter) d_filter->Resize(filter->dims());
  }
--- a/paddle/operators/cos_sim_op.cc
+++ b/paddle/operators/cos_sim_op.cc
@ -54,9 +54,10 @@ class CosSimOp : public framework::OperatorWithKernel {
                   " just 1 (which will be broadcasted to match Input(X)).");
    // resize tensor
-    ctx.Output<framework::LoDTensor>("Out")->Resize({x_dims[0], 1});
+    ctx.Output<framework::Tensor>("Out")->Resize({x_dims[0], 1});
-    ctx.Output<framework::LoDTensor>("XNorm")->Resize({x_dims[0], 1});
+    ctx.Output<framework::Tensor>("XNorm")->Resize({x_dims[0], 1});
-    ctx.Output<framework::LoDTensor>("YNorm")->Resize({y_dims[0], 1});
+    ctx.Output<framework::Tensor>("YNorm")->Resize({y_dims[0], 1});
    ctx.ShareLoD("X", /*->*/ "Out");
  }
 };
@ -81,10 +82,13 @@ Cosine Similarity Operator.
 The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
-Input(X) and Input(Y) must have the same shape, except that the 1st dimension
+The input `X` and `Y` must have the same shape, except that the 1st dimension
-of Input(Y) could be just 1 (different from Input(X)), which will be
+of input `Y` could be just 1 (different from input `X`), which will be
-broadcasted to match the shape of Input(X) before computing their cosine
+broadcasted to match the shape of input `X` before computing their cosine
 similarity.
 Both the input `X` and `Y` can carry the LoD (Level of Details) information,
 or not. But the output only shares the LoD with input `X`.
 )DOC");
  }
 };
@ -139,10 +143,8 @@ class CosSimOpGrad : public framework::OperatorWithKernel {
                      "Shape of Input(Out@Grad) must be [X.Dim(0), 1].");
    // resize tensor
-    auto *x_grad =
+    auto *x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
-        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
+    auto *y_grad = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
    auto *y_grad =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
    if (x_grad) x_grad->Resize(x_dims);
    if (y_grad) y_grad->Resize(y_dims);
  }
--- a/paddle/operators/crop_op.cc
+++ b/paddle/operators/crop_op.cc
@ -19,7 +19,6 @@ namespace paddle {
 namespace operators {
 using framework::Tensor;
 using framework::LoDTensor;
 class CropOp : public framework::OperatorWithKernel {
 public:
@ -31,9 +30,9 @@ class CropOp : public framework::OperatorWithKernel {
                            "Input(X) of CropOp should not be null.");
    PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
                            "Output(Out) of CropOp should not be null.");
-    auto x_dim = ctx.Input<LoDTensor>("X")->dims();
+    auto x_dim = ctx.Input<Tensor>("X")->dims();
-    auto *y = ctx.Input<LoDTensor>("Y");
+    auto *y = ctx.Input<Tensor>("Y");
-    auto *out = ctx.Output<LoDTensor>("Out");
+    auto *out = ctx.Output<Tensor>("Out");
    if (y == nullptr) {
      auto shape = Attr<std::vector<int>>("shape");
      PADDLE_ENFORCE_EQ(
@ -121,8 +120,8 @@ class CropOpGrad : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                            "Input(Out@GRAD) should not be null");
-    auto x_dims = ctx.Input<LoDTensor>("X")->dims();
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
-    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
    if (x_grad != nullptr) {
      x_grad->Resize(x_dims);
    }
--- a/paddle/operators/cross_entropy_op.cc
+++ b/paddle/operators/cross_entropy_op.cc
@ -17,8 +17,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 using framework::LoDTensor;
 class CrossEntropyOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@ -35,23 +33,21 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(x->dims().size(), 2, "Input(X)'s rank must be 2.");
    PADDLE_ENFORCE_EQ(label->dims().size(), 2,
                      "Input(Label)'s rank must be 2.");
    // TODO(xinghai-sun): remove this check after swtiching to bool
    PADDLE_ENFORCE(ctx.Attr<int>("soft_label") == 0 ||
                   ctx.Attr<int>("soft_label") == 1);
    PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0],
                      "The 1st dimension of Input(X) and Input(Label) must "
                      "be equal.");
-    if (ctx.Attr<int>("soft_label") == 1) {
+    if (ctx.Attr<bool>("soft_label")) {
      PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1],
-                        "If Attr(soft_label) == 1, The 2nd dimension of "
+                        "If Attr(soft_label) == true, The 2nd dimension of "
                        "Input(X) and Input(Label) must be equal.");
    } else {
      PADDLE_ENFORCE_EQ(label->dims()[1], 1,
-                        "If Attr(soft_label) == 0, The 2nd dimension of "
+                        "If Attr(soft_label) == false, The 2nd dimension of "
                        "Input(Label) must be 1.");
    }
-    ctx.Output<LoDTensor>("Y")->Resize({x->dims()[0], 1});
+    ctx.Output<Tensor>("Y")->Resize({x->dims()[0], 1});
    ctx.ShareLoD("X", /*->*/ "Y");
  }
 };
@ -74,9 +70,6 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(dy->dims().size(), 2, "Input(Y@Grad)'s rank must be 2.");
    PADDLE_ENFORCE_EQ(label->dims().size(), 2,
                      "Input(Label)'s rank must be 2.");
    // TODO(xinghai-sun): remove this check after swtiching to bool
    PADDLE_ENFORCE(ctx.Attr<int>("soft_label") == 0 ||
                   ctx.Attr<int>("soft_label") == 1);
    PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0],
                      "The 1st dimension of Input(X) and Input(Label) must "
                      "be equal.");
@ -85,17 +78,17 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
                      "be equal.");
    PADDLE_ENFORCE_EQ(dy->dims()[1], 1,
                      "The 2nd dimension of Input(Y@Grad) must be 1.");
-    if (ctx.Attr<int>("soft_label") == 1) {
+    if (ctx.Attr<bool>("soft_label")) {
      PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1],
-                        "If Attr(soft_label) == 1, The 2nd dimension of "
+                        "If Attr(soft_label) == true, The 2nd dimension of "
                        "Input(X) and Input(Label) must be equal.");
    } else {
      PADDLE_ENFORCE_EQ(label->dims()[1], 1,
-                        "If Attr(soft_label) == 0, The 2nd dimension of "
+                        "If Attr(soft_label) == false, The 2nd dimension of "
                        "Input(Label) must be 1.");
    }
-    auto dx = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
    dx->Resize(x->dims());
  }
 };
@ -108,7 +101,8 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("X", "The first input of CrossEntropyOp");
    AddInput("Label", "The second input of CrossEntropyOp");
    AddOutput("Y", "The output of CrossEntropyOp");
-    AddAttr<int>("soft_label", "Is soft label. Default zero.").SetDefault(0);
+    AddAttr<bool>("soft_label", "Is soft label. Default zero.")
        .SetDefault(false);
    AddComment(R"DOC(
 CrossEntropy Operator.
@ -116,12 +110,12 @@ CrossEntropy Operator.
 It supports both standard cross-entropy and soft-label cross-entropy loss
 computation.
 1) One-hot cross-entropy:
-    soft_label = 0, Label[i, 0] indicates the class index for sample i:
+    soft_label = False, Label[i, 0] indicates the class index for sample i:
                Y[i] = -log(X[i, Label[i]])
 2) Soft-label cross-entropy:
-    soft_label = 1, Label[i, j] indicates the soft label of class j
+    soft_label = True, Label[i, j] indicates the soft label of class j
    for sample i:
                Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
@ -133,6 +127,9 @@ computation.
     As a special case of 2), when each row of Input(Label) has only one
     non-zero element (equals 1), soft-label cross-entropy degenerates to a
     one-hot cross-entropy with one-hot label representation.
 Both the input `X` and `Label` can carry the LoD (Level of Details) information,
 or not. But the output only shares the LoD with input `X`.
 )DOC");
  }
 };
--- a/paddle/operators/cross_entropy_op.cu
+++ b/paddle/operators/cross_entropy_op.cu
@ -102,7 +102,7 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel {
    int grid = (n + block - 1) / block;
    // TODO(qingqing) launch kernel on specified stream
    // base on ExecutionContext.
-    if (ctx.Attr<int>("soft_label") == 1) {
+    if (ctx.Attr<bool>("soft_label")) {
      auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
      SoftCrossEntropyKernel<T><<<grid, block>>>(y_data, x_data, label_data, n,
                                                 d);
@ -137,7 +137,7 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel {
    grid = (n + block - 1) / block;
    // TODO(qingqing): launch kernel on specified stream
    // base on ExecutionContext.
-    if (ctx.Attr<int>("soft_label") == 1) {
+    if (ctx.Attr<bool>("soft_label")) {
      auto* label_data = label->data<T>();
      SoftCrossEntropyGradientKernel<T><<<grid, block>>>(
          dx_data, dy_data, x_data, label_data, n, d);
--- a/paddle/operators/cross_entropy_op.h
+++ b/paddle/operators/cross_entropy_op.h
@ -51,7 +51,7 @@ class CrossEntropyOpKernel : public framework::OpKernel {
    int batch_size = x->dims()[0];
    int class_num = x->dims()[1];
-    if (ctx.Attr<int>("soft_label") == 1) {
+    if (ctx.Attr<bool>("soft_label")) {
      auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
      int index = 0;
      for (int i = 0; i < batch_size; ++i) {
@ -92,7 +92,7 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel {
    int class_num = x->dims()[1];
    // TODO(qingqing): make zero setting an common function.
-    if (ctx.Attr<int>("soft_label") == 1) {
+    if (ctx.Attr<bool>("soft_label")) {
      auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
      int index = 0;
      for (int i = 0; i < batch_size; ++i) {
--- a/paddle/operators/dropout_op.cc
+++ b/paddle/operators/dropout_op.cc
@ -18,7 +18,6 @@ namespace paddle {
 namespace operators {
 using framework::Tensor;
 using framework::LoDTensor;
 class DropoutOp : public framework::OperatorWithKernel {
 public:
@ -29,15 +28,13 @@ class DropoutOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
    PADDLE_ENFORCE_GE(ctx.Attr<float>("dropout_prob"), 0);
    PADDLE_ENFORCE_LE(ctx.Attr<float>("dropout_prob"), 1);
    // TODO(xinghai-sun): remove this check after swtiching to bool
    PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
                   ctx.Attr<int>("is_training") == 1);
    auto dims = ctx.Input<Tensor>("X")->dims();
-    ctx.Output<LoDTensor>("Out")->Resize(dims);
+    ctx.Output<Tensor>("Out")->Resize(dims);
-    if (ctx.Attr<int>("is_training") == 1) {
+    if (ctx.Attr<bool>("is_training")) {
-      ctx.Output<LoDTensor>("Mask")->Resize(dims);
+      ctx.Output<Tensor>("Mask")->Resize(dims);
    }
    ctx.ShareLoD("X", /*->*/ "Out");
  }
 };
@ -49,8 +46,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddAttr<AttrType>("dropout_prob", "Probability of setting units to zero.")
        .SetDefault(.5f);
-    // TODO(xinghai-sun): use bool for is_training after bool is supported.
+    AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
    AddAttr<int>("is_training", "Whether in training phase.").SetDefault(1);
    AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
    AddInput("X", "The input of dropout op.");
    AddOutput("Out", "The output of dropout op.");
@ -59,7 +55,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 Dropout Operator.
-"Dropout" refers to randomly dropping out units in a nerual network. It is a
+'Dropout' refers to randomly dropping out units in a nerual network. It is a
 regularization technique for reducing overfitting by preventing neuron
 co-adaption during training. The dropout operator randomly set (according to
 the given dropout probability) the outputs of some units to zero, while others
@ -75,7 +71,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(ctx.Attr<int>("is_training"), 1,
+    PADDLE_ENFORCE(ctx.Attr<bool>("is_training"),
                   "GradOp is only callable when is_training is true");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
@ -85,9 +81,6 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_GE(ctx.Attr<AttrType>("dropout_prob"), 0);
    PADDLE_ENFORCE_LE(ctx.Attr<AttrType>("dropout_prob"), 1);
    // TODO(xinghai-sun): remove this check after swtiching to bool
    PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
                   ctx.Attr<int>("is_training") == 1);
    auto x_dims = ctx.Input<Tensor>("X")->dims();
    auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
    PADDLE_ENFORCE_EQ(x_dims, out_dims,
@ -96,7 +89,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(x_dims, mask_dims,
                      "Dimensions of Input(X) and Mask must be the same.");
-    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
    x_grad->Resize(x_dims);
  }
 };
--- a/paddle/operators/dropout_op.cu
+++ b/paddle/operators/dropout_op.cu
@ -59,7 +59,7 @@ class GPUDropoutKernel : public framework::OpKernel {
    auto Y = EigenMatrix<T>::Reshape(*y, 1);
    auto place = context.GetEigenDevice<Place>();
-    if (context.Attr<int>("is_training") == 1) {
+    if (context.Attr<bool>("is_training")) {
      auto* mask = context.Output<Tensor>("Mask");
      auto* mask_data = mask->mutable_data<T>(context.GetPlace());
      int size = framework::product(mask->dims());
--- a/Show More
+++ b/Show More