Merge branch 'develop' of https://github.com/paddlepaddle/paddle into const

8 years ago · eae5c94944
parent 9e904e5077 548b72a678
commit eae5c94944
35 changed files with 705 additions and 67 deletions
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -187,7 +187,13 @@ function(cc_library TARGET_NAME)
    endif()
    
    # cpplint code style
-    add_style_check_target(${TARGET_NAME} ${cc_library_SRCS})
+    foreach(source_file ${cc_library_SRCS})
+      string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
+      if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+        list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+      endif()
+    endforeach()
+    add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS})

  else(cc_library_SRCS)
    if (cc_library_DEPS)
@ -239,6 +245,14 @@ function(nv_library TARGET_NAME)
        add_dependencies(${TARGET_NAME} ${nv_library_DEPS})
        target_link_libraries(${TARGET_NAME} ${nv_library_DEPS})
      endif()
+      # cpplint code style
+      foreach(source_file ${nv_library_SRCS})
+        string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
+        if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+          list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+        endif()
+      endforeach()
+      add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS})
    else(nv_library_SRCS)
      if (nv_library_DEPS)
        merge_static_libs(${TARGET_NAME} ${nv_library_DEPS})
--- a/paddle/framework/ddim.h
+++ b/paddle/framework/ddim.h
@ -25,18 +25,15 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-namespace {
-typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
-                       Dim<8>, Dim<9>>
-    DDimVar;
-}
-
 /**
 * \brief A dynamically sized dimension.
 *
 * The number of dimensions must be between [1, 9].
 */
 struct DDim {
+  typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
+                         Dim<8>, Dim<9>>
+      DDimVar;
  DDimVar var;

  DDim() : var(Dim<1>()) {}
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@ -26,7 +26,7 @@ using VarIndexMap = std::unordered_map<std::string, int>;
 enum class OpArgType { IN, OUT };

 static std::vector<int>* GetOpFormat(OperatorBase* op, const OpArgType& type) {
-  std::string key = type == OpArgType::IN ? "input_format" : "output_name";
+  std::string key = type == OpArgType::IN ? "input_format" : "output_format";
  return op->attrs_.count(key)
             ? &boost::get<std::vector<int>>(op->attrs_.at(key))
             : nullptr;
@ -34,7 +34,7 @@ static std::vector<int>* GetOpFormat(OperatorBase* op, const OpArgType& type) {

 static const std::vector<int>* GetOpFormat(const OperatorBase* op,
                                           const OpArgType& type) {
-  std::string key = type == OpArgType::IN ? "input_format" : "output_name";
+  std::string key = type == OpArgType::IN ? "input_format" : "output_format";
  return op->attrs_.count(key)
             ? &boost::get<std::vector<int>>(op->attrs_.at(key))
             : nullptr;
@ -82,7 +82,7 @@ OperatorBase* BuildGradOp(const OperatorBase* op) {
  grad_op->attrs_ = op->attrs_;
  grad_op->attrs_.erase("input_format");
  grad_op->attrs_.erase("output_format");
-  if (GetOpFormat(op, OpArgType::OUT) != nullptr) {
+  if (GetOpFormat(op, OpArgType::IN) != nullptr) {
    grad_op->attrs_["output_format"] = std::vector<int>({0});
  }
  if (GetOpFormat(op, OpArgType::IN) != nullptr ||
--- a/paddle/framework/grad_op_builder.h
+++ b/paddle/framework/grad_op_builder.h
@ -1,3 +1,17 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once

 #include "paddle/framework/operator.h"
--- a/paddle/framework/grad_op_builder_test.cc
+++ b/paddle/framework/grad_op_builder_test.cc
@ -8,10 +8,49 @@ USE_OP(add_two);
 namespace paddle {
 namespace framework {

+class NOP : public OperatorBase {
+ public:
+  void InferShape(const Scope &scope) const override {}
+  void Run(const Scope &scope,
+           const platform::DeviceContext &dev_ctx) const override {}
+};
+
+class MutiInOutOpMaker : public OpProtoAndCheckerMaker {
+ public:
+  MutiInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("In1", "a single input");
+    AddInput("In2_mult", "a multiple input").SetMultiple();
+    AddInput("In3", "another single input");
+    AddOutput("Out1", "a single output");
+    AddOutput("Out2_mult", "a multiple output").SetMultiple();
+    AddComment("test op with multiple inputs and outputs");
+  }
+};
+
+class IOIgnoredOpMaker : public OpProtoAndCheckerMaker {
+ public:
+  IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("In1", "a single input");
+    AddInput("In2_mult", "a multiple input").SetMultiple().IgnoreGradient();
+    AddInput("In3_mult", "another multiple input").SetMultiple();
+    AddOutput("Out1_mult", "a multiple output").SetMultiple();
+    AddOutput("Out2", "a single output").IgnoreGradient();
+    AddComment("op with inputs and outputs ignored in gradient calculating");
+  }
+};
+
+}  // namespace framework
+}  // namespace paddle
+
+namespace f = paddle::framework;
+
 TEST(GradOpBuilder, AddTwo) {
-  std::shared_ptr<OperatorBase> add_op(
-      OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {}));
-  std::shared_ptr<OperatorBase> grad_add_op = OpRegistry::CreateGradOp(*add_op);
+  std::shared_ptr<f::OperatorBase> add_op(
+      f::OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {}));
+  std::shared_ptr<f::OperatorBase> grad_add_op =
+      f::OpRegistry::CreateGradOp(*add_op);
  EXPECT_EQ(static_cast<int>(grad_add_op->inputs_.size()), 4);
  EXPECT_EQ(static_cast<int>(grad_add_op->outputs_.size()), 2);
  EXPECT_EQ(grad_add_op->Input("X"), "x");
@ -22,5 +61,85 @@ TEST(GradOpBuilder, AddTwo) {
  EXPECT_EQ(grad_add_op->Output("Y@GRAD"), "y@GRAD");
 }

-}  // namespace framework
-}  // namespace paddle
+REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker);
+REGISTER_GRADIENT_OP(mult_io, mult_io_grad, f::NOP);
+REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker);
+REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::NOP);
+
+TEST(GradOpBuilder, MutiInOut) {
+  f::AttributeMap attrs{{"input_format", std::vector<int>{0, 1, 4, 5}},
+                        {"output_format", std::vector<int>{0, 1, 3}}};
+  std::shared_ptr<f::OperatorBase> test_op(f::OpRegistry::CreateOp(
+      "mult_io", {"in1", "in2_1", "in2_2", "in2_3", "in3"},
+      {"out1", "out2_1", "out2_2"}, attrs));
+  std::shared_ptr<f::OperatorBase> grad_test_op =
+      f::OpRegistry::CreateGradOp(*test_op);
+
+  ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL);
+  EXPECT_EQ(grad_test_op->Input("In1"), "in1");
+  EXPECT_EQ(grad_test_op->Inputs("In2_mult"),
+            std::vector<std::string>({"in2_1", "in2_2", "in2_3"}));
+  EXPECT_EQ(grad_test_op->Input("In3"), "in3");
+  EXPECT_EQ(grad_test_op->Input("Out1"), "out1");
+  EXPECT_EQ(grad_test_op->Inputs("Out2_mult"),
+            std::vector<std::string>({"out2_1", "out2_2"}));
+  EXPECT_EQ(grad_test_op->Input("Out1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            "out1" + f::OperatorBase::GRAD_VAR_SUFFIX());
+  EXPECT_EQ(
+      grad_test_op->Inputs("Out2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+      std::vector<std::string>(
+          {"out2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+           "out2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+
+  ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
+  EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            "in1" + f::OperatorBase::GRAD_VAR_SUFFIX());
+  EXPECT_EQ(
+      grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+      std::vector<std::string>({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                                "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                                "in2_3" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(grad_test_op->Output("In3" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            "in3" + f::OperatorBase::GRAD_VAR_SUFFIX());
+}
+
+TEST(GradOpBuilder, IOIgnoredInGradient) {
+  f::AttributeMap attrs{{"input_format", std::vector<int>{0, 1, 3, 5}},
+                        {"output_format", std::vector<int>{0, 2, 3}}};
+  std::shared_ptr<f::OperatorBase> test_op(f::OpRegistry::CreateOp(
+      "io_ignored", {"in1", "in2_1", "in2_2", "in3_1", "in3_2"},
+      {"out1_1", "out1_2", "out2"}, attrs));
+  std::shared_ptr<f::OperatorBase> grad_test_op =
+      f::OpRegistry::CreateGradOp(*test_op);
+
+  // 'In2' and 'Out2' are ignored in gradient calculating
+  ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL);
+  EXPECT_EQ(grad_test_op->Input("In1"), "in1");
+  EXPECT_EQ(grad_test_op->Inputs("In2_mult"),
+            std::vector<std::string>({f::OperatorBase::EMPTY_VAR_NAME(),
+                                      f::OperatorBase::EMPTY_VAR_NAME()}));
+  EXPECT_EQ(grad_test_op->Inputs("In3_mult"),
+            std::vector<std::string>({"in3_1", "in3_2"}));
+  EXPECT_EQ(grad_test_op->Inputs("Out1_mult"),
+            std::vector<std::string>({"out1_1", "out1_2"}));
+  EXPECT_EQ(grad_test_op->Input("Out2"), f::OperatorBase::EMPTY_VAR_NAME());
+  EXPECT_EQ(
+      grad_test_op->Inputs("Out1_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+      std::vector<std::string>(
+          {"out1_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+           "out1_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(grad_test_op->Input("Out2" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            "out2" + f::OperatorBase::GRAD_VAR_SUFFIX());
+
+  ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
+  EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            "in1" + f::OperatorBase::GRAD_VAR_SUFFIX());
+  EXPECT_EQ(
+      grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+      std::vector<std::string>({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                                "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(
+      grad_test_op->Outputs("In3_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+      std::vector<std::string>({"in3_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                                "in3_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+}
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@ -314,7 +314,7 @@ class OpRegistry {
  static std::unordered_map<std::string, OpProto>& protos() {
    static std::unordered_map<std::string, OpProto> protos_;
    return protos_;
-  };
+  }

  static std::unordered_map<std::string, std::string>& grad_ops() {
    static std::unordered_map<std::string, std::string> grad_ops_;
@ -336,7 +336,7 @@ class OpRegistry {
  static std::unordered_map<std::string, OpAttrChecker>& op_checkers() {
    static std::unordered_map<std::string, OpAttrChecker> op_checkers_;
    return op_checkers_;
-  };
+  }

  static void GenerateTempVariableName(OperatorBase* op) {
    static std::atomic<size_t> gUniqId(0UL);
@ -353,7 +353,7 @@ class OpRegistry {
 template <typename OpType, typename ProtoMakerType>
 class OpRegisterHelper {
 public:
-  OpRegisterHelper(const char* op_type) {
+  explicit OpRegisterHelper(const char* op_type) {
    OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type);
  }
 };
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -285,7 +285,7 @@ class OperatorWithKernel : public OperatorBase {
    platform::Place place_;

    OpKernelKey() = default;
-    OpKernelKey(const platform::DeviceContext& dev_ctx) {
+    explicit OpKernelKey(const platform::DeviceContext& dev_ctx) {
      place_ = dev_ctx.GetPlace();
    }

--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@ -105,7 +105,16 @@ PYBIND11_PLUGIN(core) {
      .def("set", PyCUDATensorSetFromArray<float>)
      .def("set", PyCUDATensorSetFromArray<int>)
 #endif
-      .def("shape", [](Tensor &self) { return vectorize(self.dims()); });
+      .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
+      .def("set_float_element",
+           [](Tensor &self, size_t offset, float f) {
+             // TODO(yuyang18): Only support GPU now.
+             self.data<float>()[offset] = f;
+           })
+      .def("get_float_element", [](Tensor &self, size_t offset) -> float {
+        // TODO(yuyang18): Only support GPU now.
+        return self.data<float>()[offset];
+      });

  py::class_<Variable>(m, "Variable", R"DOC(Variable Class.

--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() {
  size_t numSequences = getGenBatchSize();

  resizeBootFrame(numSequences);
-  // We create only two sub-network in generation for alternate use.
-  // Thus, we can reduce total memory of output_ in layer forward.
+  // We create only two sub-network in generation, one stores states of all
+  // layers in previous time step and the other storing the states at current
+  // time step.
  resizeOrCreateFrames(2);

  // outFrameLines_.size() > 1UL
@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() {

  // init outArg
  size_t resultNum = generator_.config.num_results_per_sample();
-  IVector::resizeOrCreate(
-      generator_.outArg.ids,
-      generator_.config.max_num_frames() * numSequences * resultNum,
-      false);
+  size_t maxGenWordCount =
+      generator_.config.max_num_frames() * numSequences * resultNum;
+  IVector::resizeOrCreate(generator_.outArg.ids, maxGenWordCount, false);
  if (resultNum > 1) {
    CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size()));
    Matrix::resizeOrCreate(generator_.outArg.in,
@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() {
                           /* width */ resultNum,
                           false,
                           /* useGpu */ false);
+    Matrix::resizeOrCreate(generator_.outArg.value,
+                           /* height */ maxGenWordCount,
+                           /* width */ 1,
+                           false,
+                           /* useGpu */ false);
  }
  ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
                                numSequences + 1,
@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
  starts[0] = 0;
  if (numResults > 1) {
    real* probs = generator_.outArg.in->getData();
+    real* idsProb = generator_.outArg.value->getData();
+    size_t curPos = 0;
    for (size_t i = 0; i < finalPaths_.size(); ++i) {
      for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
        Path& path = finalPaths_[i][j];
-        generator_.ids.push_back(path.ids.size());  // sequence size
+        size_t genLen = path.ids.size();
+        generator_.ids.push_back(genLen);  // sequence size
        generator_.ids.insert(
            generator_.ids.end(), path.ids.begin(), path.ids.end());
        generator_.ids.push_back(-1);  // end of sequence
+
+        memcpy(idsProb + curPos, path.idsProb.data(), sizeof(real) * genLen);
+        curPos += genLen;
+        idsProb[curPos++] = -1.0;
        probs[i * numResults + j] = path.logProb;

        if (!j && dataArgsSize_) {
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@ -189,6 +189,11 @@ public:
     */
    std::vector<int> ids;

+    /**
+     * @brief idsProb, log probability of each generated words.
+     */
+    std::vector<real> idsProb;
+
    /**
     * @brief logProb, current probability of path.
     */
@ -228,11 +233,13 @@ public:
     */
    Path(Path& old, int newId, real logProb, int machineId, int topIndex)
        : ids(old.ids),
+          idsProb(old.idsProb),
          logProb(old.logProb + logProb),
          machineId(machineId),
          topIndex(topIndex),
          seqId(old.seqId) {
      ids.push_back(newId);
+      idsProb.push_back(logProb);
      if (!old.probHistory.empty()) {
        this->probHistory = old.probHistory;
        // probHistory store current prob, not sum
@ -411,8 +418,9 @@ protected:

  struct Generator {
    GeneratorConfig config;
-    std::vector<int> ids;  // store generated sequences
-    Argument outArg;       // final output argument
+    std::vector<int> ids;       // store generated sequences
+    std::vector<real> idsProb;  // log probability of each generated word
+    Argument outArg;            // final output argument
  };
  bool generating_;
  Generator generator_;
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@ -400,7 +400,6 @@ void initDataLayer(TestConfig testConf,
        const std::vector<int>& labelSeqStartPositions =
            testConf.inputDefs[i].labelSeqStartPositions;
        if (labelSeqStartPositions.size() != 0) {
-          CHECK(!sequenceStartPositions);
          CHECK_GE(static_cast<int>(labelSeqStartPositions.size()), 2);

          sequenceStartPositions =
@ -410,6 +409,19 @@ void initDataLayer(TestConfig testConf,
                                           useGpu);
          data.sequenceStartPositions = sequenceStartPositions;
        }
+
+        const std::vector<int>& labelSubSeqStartPositions =
+            testConf.inputDefs[i].labelSubSeqStartPositions;
+        if (labelSubSeqStartPositions.size() != 0) {
+          CHECK_GE(static_cast<int>(labelSubSeqStartPositions.size()), 2);
+
+          subSequenceStartPositions =
+              ICpuGpuVector::create(labelSubSeqStartPositions.size(), useGpu);
+          subSequenceStartPositions->copyFrom(labelSubSeqStartPositions.data(),
+                                              labelSubSeqStartPositions.size(),
+                                              useGpu);
+          data.subSequenceStartPositions = subSequenceStartPositions;
+        }
        break;
      }
      default:
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
@ -67,6 +67,7 @@ struct InputDef {
  bool isStatic;
  std::vector<int> labelInitValue;
  std::vector<int> labelSeqStartPositions;
+  std::vector<int> labelSubSeqStartPositions;
  MatrixPtr selfDefinedData;

  InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) {
@ -81,8 +82,10 @@ struct InputDef {
  InputDef(InputType type,
           string nameIn,
           MatrixPtr selfDefinedData,
-           std::vector<int> selfDefinedSeqStartPos = {})
+           std::vector<int> selfDefinedSeqStartPos = {},
+           std::vector<int> selfDefinedSubSeqStartPos = {})
      : labelSeqStartPositions(selfDefinedSeqStartPos),
+        labelSubSeqStartPositions(selfDefinedSubSeqStartPos),
        selfDefinedData(selfDefinedData) {
    inputType = type;
    name = nameIn;
--- a/paddle/memory/detail/buddy_allocator.h
+++ b/paddle/memory/detail/buddy_allocator.h
@ -39,7 +39,7 @@ class BuddyAllocator {

 public:
  void* Alloc(size_t unaligned_size);
-  void Free(void*);
+  void Free(void* ptr);
  size_t Used();

 public:
--- a/paddle/memory/detail/meta_cache.h
+++ b/paddle/memory/detail/meta_cache.h
@ -33,17 +33,17 @@ namespace detail {
 */
 class MetadataCache {
 public:
-  MetadataCache(bool uses_gpu);
+  explicit MetadataCache(bool uses_gpu);

 public:
  /*! \brief Load the associated metadata for the specified memory block. */
-  Metadata load(const MemoryBlock*);
+  Metadata load(const MemoryBlock* memory_block);

  /*! \brief Store the associated metadata for the specified memory block. */
-  void store(MemoryBlock*, const Metadata&);
+  void store(MemoryBlock* memory_block, const Metadata& meta_data);

  /*! \brief Indicate that the specified metadata will no longer be used. */
-  void invalidate(MemoryBlock*);
+  void invalidate(MemoryBlock* memory_block);

 public:
  MetadataCache(const MetadataCache&) = delete;
--- a/paddle/memory/memory.h
+++ b/paddle/memory/memory.h
@ -68,7 +68,7 @@ class PODDeleter {
  static_assert(std::is_pod<T>::value, "T must be POD");

 public:
-  PODDeleter(Place place) : place_(place) {}
+  explicit PODDeleter(Place place) : place_(place) {}
  void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); }

 private:
--- a/paddle/operators/add_op.cu
+++ b/paddle/operators/add_op.cu
@ -1,3 +1,17 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/add_op.h"
--- a/paddle/operators/cross_entropy_op.cu
+++ b/paddle/operators/cross_entropy_op.cu
@ -1,5 +1,19 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU
 #include "paddle/operators/cross_entropy_op.h"

 REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
-                       ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>);
+                       ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>);
--- a/paddle/operators/fill_zeros_like_op.cu
+++ b/paddle/operators/fill_zeros_like_op.cu
@ -1,6 +1,20 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/fill_zeros_like_op.h"

 REGISTER_OP_GPU_KERNEL(
    fill_zeros_like,
-    paddle::operators::FillZerosLikeKernel<paddle::platform::GPUPlace, float>);
+    paddle::operators::FillZerosLikeKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/mean_op.cu
+++ b/paddle/operators/mean_op.cu
@ -1,6 +1,20 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU

 #include "paddle/operators/mean_op.h"

 REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel<ops::GPUPlace, float>);
-REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::GPUPlace, float>);
--- a/paddle/operators/mul_op.cu
+++ b/paddle/operators/mul_op.cu
@ -15,4 +15,4 @@
 #define EIGEN_USE_GPU
 #include "paddle/operators/mul_op.h"

-REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>);
--- a/paddle/operators/recurrent_op.h
+++ b/paddle/operators/recurrent_op.h
@ -19,7 +19,7 @@
 namespace paddle {
 namespace operators {

-using namespace paddle::framework;
+using namespace paddle::framework;  // NOLINT

 namespace rnn {

@ -94,7 +94,7 @@ void InitArgument(const ArgumentName& name, Argument* arg);
 };  // namespace rnn

 // The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
-// TODO:
+// TODO(Yan Chunwei):
 // 1. No-padding computing for sequences with indifinite length in one batch.
 // 2. Hierarchical RNN for sequence with sub-sequence.
 // 3. Internal Memory.
@ -172,12 +172,10 @@ public:
  /**
   * InferShape must be called before Run.
   */
-  virtual void InferShape(const Scope& scope) const override {
-    alg_.InferShape(scope);
-  }
+  void InferShape(const Scope& scope) const override { alg_.InferShape(scope); }

-  virtual void Run(const Scope& scope,
-                   const platform::DeviceContext& dev_ctx) const override {
+  void Run(const Scope& scope,
+           const platform::DeviceContext& dev_ctx) const override {
    alg_.Run(scope, dev_ctx);
  }

@ -194,12 +192,10 @@ public:
  /**
   * InferShape must be called before Run.
   */
-  virtual void InferShape(const Scope& scope) const override {
-    alg_.InferShape(scope);
-  }
+  void InferShape(const Scope& scope) const override { alg_.InferShape(scope); }

-  virtual void Run(const Scope& scope,
-                   const platform::DeviceContext& dev_ctx) const override {
+  void Run(const Scope& scope,
+           const platform::DeviceContext& dev_ctx) const override {
    alg_.Run(scope, dev_ctx);
  }

--- a/paddle/operators/rowwise_add_op.cu
+++ b/paddle/operators/rowwise_add_op.cu
@ -1,3 +1,17 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU
 #include "paddle/operators/rowwise_add_op.h"

--- a/paddle/operators/sgd_op.cu
+++ b/paddle/operators/sgd_op.cu
@ -1,4 +1,18 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU
 #include "paddle/operators/sgd_op.h"

-REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>);
--- a/paddle/operators/sigmoid_op.cu
+++ b/paddle/operators/sigmoid_op.cu
@ -1,3 +1,17 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU
 #include "paddle/operators/sigmoid_op.h"

--- a/paddle/operators/softmax_op.cu
+++ b/paddle/operators/softmax_op.cu
@ -1,6 +1,21 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
 #define EIGEN_USE_GPU
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/softmax_op.h"

 REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>);
-REGISTER_OP_GPU_KERNEL(softmax_grad, ops::SoftmaxGradKernel<ops::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(softmax_grad,
+                       ops::SoftmaxGradKernel<ops::GPUPlace, float>);
--- a/Show More
+++ b/Show More