Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

8 years ago · 147c3f523a
parent 63912dcc19 7506e4816b
commit 147c3f523a
106 changed files with 3212 additions and 198 deletions
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -19,7 +19,7 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
 proto_library(framework_proto SRCS framework.proto)

 cc_library(attribute SRCS attribute.cc DEPS framework_proto)
-cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute)
+cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim)
 cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
 cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
 cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc)
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@ -302,7 +302,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
    return grad_op_descs;  // empty vector
  }

-  grad_op_descs = OpRegistry::CreateGradOpDescs(*op_desc);
+  grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get());

  std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
  for (auto& desc : grad_op_descs) {
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@ -58,6 +58,8 @@ class MulOpMaker : public OpProtoAndCheckerMaker {
    AddInput("X", "A");
    AddInput("Y", "B");
    AddOutput("Out", "Out");
+    AddAttr<int>("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
+    AddAttr<int>("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
    AddComment("Mul");
  }
 };
@ -440,6 +442,28 @@ TEST(Backward, simple_single_op) {
            std::vector<std::string>({f::GradVarName("b")}));
 }

+TEST(Backward, default_attribute) {
+  f::ProgramDesc *program_desc = GetNewProgramDesc();
+  f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
+  f::BlockDescBind *block = program.Block(0);
+  f::OpDescBind *op = block->AppendOp();
+  op->SetType("mul");
+  op->SetInput("X", {"x"});
+  op->SetInput("Y", {"y"});
+  op->SetOutput("Out", {"out"});
+
+  AppendBackward(program, {});
+
+  ASSERT_EQ(block->AllOps().size(), 2UL);
+  EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1);
+  EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1);
+
+  f::OpDescBind *grad_op = block->AllOps()[1];
+  ASSERT_EQ(grad_op->Type(), "mul_grad");
+  EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1);
+  EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1);
+}
+
 TEST(Backward, simple_mult_op) {
  f::ProgramDesc *program_desc = GetNewProgramDesc();
  f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
--- a/paddle/framework/block_desc.cc
+++ b/paddle/framework/block_desc.cc
@ -74,6 +74,12 @@ void BlockDescBind::Sync() {
    for (auto &op_desc : ops_) {
      op_field.AddAllocated(op_desc->Proto());
    }
+    auto &var_field = *this->desc_->mutable_vars();
+    var_field.Clear();
+    var_field.Reserve(static_cast<int>(vars_.size()));
+    for (auto &var_desc : vars_) {
+      var_field.AddAllocated(var_desc.second->Proto());
+    }
    need_update_ = false;
  }
 }
--- a/paddle/framework/block_desc.h
+++ b/paddle/framework/block_desc.h
@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once

 #include <deque>
+#include <memory>
 #include <unordered_map>
 #include <vector>
 #include "paddle/framework/op_desc.h"
--- a/paddle/framework/data_type.h
+++ b/paddle/framework/data_type.h
@ -28,7 +28,6 @@ inline DataType ToDataType(std::type_index type) {
    return DataType::INT32;
  } else {
    PADDLE_THROW("Not supported");
-    return static_cast<DataType>(-1);
  }
 }

--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 syntax = "proto2";
+option optimize_for = LITE_RUNTIME;
 package paddle.framework;

 enum AttrType {
--- a/paddle/framework/op_desc.cc
+++ b/paddle/framework/op_desc.cc
@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/framework/op_desc.h"
+#include <functional>
+#include <unordered_map>
 #include "paddle/framework/block_desc.h"
+#include "paddle/framework/operator.h"

 namespace paddle {
 namespace framework {
@ -25,6 +28,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
  inputs_ = inputs;
  outputs_ = outputs;
  attrs_ = attrs;
+  need_update_ = true;
 }

 OpDesc *OpDescBind::Proto() {
@ -184,5 +188,38 @@ void OpDescBind::Sync() {
    need_update_ = false;
  }
 }
+
+using InferShapeFuncMap =
+    std::unordered_map<std::string /*op_type*/,
+                       std::function<void(InferShapeContext *)>>;
+
+static InferShapeFuncMap &InferShapeFuncs() {
+  static InferShapeFuncMap *g_map = nullptr;
+  if (g_map == nullptr) {
+    g_map = new InferShapeFuncMap();
+    auto &info_map = OpInfoMap::Instance();
+    // all registered kernels
+    for (auto &pair : OperatorWithKernel::AllOpKernels()) {
+      auto &info = info_map.Get(pair.first);
+      // use empty type here to avoid runtime checks.
+      auto op =
+          static_cast<OperatorWithKernel *>(info.Creator()("", {}, {}, {}));
+      g_map->insert(
+          {pair.first, [op](InferShapeContext *ctx) { op->InferShape(ctx); }});
+    }
+  }
+  return *g_map;
+}
+
+void OpDescBind::InferShape(const BlockDescBind &block) const {
+  auto &funcs = InferShapeFuncs();
+  auto it = funcs.find(this->Type());
+  if (it == funcs.end()) {
+    PADDLE_THROW("Operator %s has not been registered", this->Type());
+  }
+  CompileTimeInferShapeContext ctx(*this, block);
+  it->second(&ctx);
+}
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/op_desc.h
+++ b/paddle/framework/op_desc.h
@ -52,8 +52,6 @@ class OpDescBind {
  void SetOutput(const std::string &param_name,
                 const std::vector<std::string> &args);

-  std::string DebugString() { return this->Proto()->DebugString(); }
-
  bool HasAttr(const std::string &name) const {
    return attrs_.find(name) != attrs_.end();
  }
@ -97,6 +95,13 @@ class OpDescBind {

  const VariableNameMap &Outputs() const { return outputs_; }

+  AttributeMap *MutableAttrMap() {
+    this->need_update_ = true;
+    return &this->attrs_;
+  }
+
+  void InferShape(const BlockDescBind &block) const;
+
 private:
  template <typename MapType>
  static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
--- a/paddle/framework/op_registry.cc
+++ b/paddle/framework/op_registry.cc
@ -60,9 +60,14 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
 }

 std::vector<std::unique_ptr<OpDescBind>> OpRegistry::CreateGradOpDescs(
-    const OpDescBind& op_desc) {
-  auto& info = OpInfoMap::Instance().Get(op_desc.Type());
-  return info.grad_op_maker_(op_desc);
+    OpDescBind* op_desc) {
+  auto& info = OpInfoMap::Instance().Get(op_desc->Type());
+
+  if (info.Checker() != nullptr) {
+    info.Checker()->Check(*op_desc->MutableAttrMap());
+  }
+
+  return info.grad_op_maker_(*op_desc);
 }

 }  // namespace framework
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@ -80,7 +80,7 @@ class OpRegistry {
  static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);

  static std::vector<std::unique_ptr<OpDescBind>> CreateGradOpDescs(
-      const OpDescBind& op_desc);
+      OpDescBind* op_desc);

  static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
 };
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -205,13 +205,13 @@ void OperatorBase::GenerateTemporaryNames() {
 }

 template <>
-const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const {
+const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
  auto* var = InputVar(name);
  return var == nullptr ? nullptr : GetTensorFromVar(var);
 }

 template <>
-const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
+const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
    const std::string& name) const {
  auto names = op().Inputs(name);
  std::vector<const Tensor*> res;
@ -225,13 +225,13 @@ const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
 }

 template <>
-Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const {
+Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
  auto var = OutputVar(name);
  return var == nullptr ? nullptr : var->GetMutable<LoDTensor>();
 }

 template <>
-std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
+std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
    const std::string& name) const {
  auto names = op().Outputs(name);
  std::vector<Tensor*> res;
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -57,7 +57,6 @@ inline std::string GradVarName(const std::string& var_name) {
 }

 class OperatorBase;
-class InferShapeContext;
 class ExecutionContext;

 extern const Tensor* GetTensorFromVar(const Variable* var);
@ -169,10 +168,11 @@ class NOP : public OperatorBase {
  }
 };

-class InferShapeContext {
+class ExecutionContext {
 public:
-  InferShapeContext(const OperatorBase& op, const Scope& scope)
-      : op_(op), scope_(scope) {}
+  ExecutionContext(const OperatorBase& op, const Scope& scope,
+                   const platform::DeviceContext& device_context)
+      : op_(op), scope_(scope), device_context_(device_context) {}

  const OperatorBase& op() const { return op_; }

@ -278,31 +278,6 @@ class InferShapeContext {
    out_tensor->set_lod(in_tensor.lod());
  }

- private:
-  const OperatorBase& op_;
-  const Scope& scope_;
-};
-
-template <>
-const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const;
-
-template <>
-const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
-    const std::string& name) const;
-
-template <>
-Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const;
-
-template <>
-std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
-    const std::string& name) const;
-
-class ExecutionContext : public InferShapeContext {
- public:
-  ExecutionContext(const OperatorBase& op, const Scope& scope,
-                   const platform::DeviceContext& device_context)
-      : InferShapeContext(op, scope), device_context_(device_context) {}
-
  template <typename PlaceType,
            typename DeviceType = typename platform::EigenDeviceConverter<
                PlaceType>::EigenDeviceType>
@ -315,10 +290,26 @@ class ExecutionContext : public InferShapeContext {
  }

 private:
+  const OperatorBase& op_;
+  const Scope& scope_;
  const platform::DeviceContext& device_context_;
 };

-class CompileTimeInferShapeContext : public InferShapeContextBase {
+template <>
+const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const;
+
+template <>
+const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
+    const std::string& name) const;
+
+template <>
+Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const;
+
+template <>
+std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
+    const std::string& name) const;
+
+class CompileTimeInferShapeContext : public InferShapeContext {
 public:
  CompileTimeInferShapeContext(const OpDescBind& op, const BlockDescBind& block)
      : op_(op), block_(block) {}
@ -414,7 +405,7 @@ class CompileTimeInferShapeContext : public InferShapeContextBase {
  const BlockDescBind& block_;
 };

-class RuntimeInferShapeContext : public InferShapeContextBase {
+class RuntimeInferShapeContext : public InferShapeContext {
 public:
  RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope)
      : op_(op), scope_(scope) {}
@ -612,7 +603,7 @@ class OperatorWithKernel : public OperatorBase {
                       });
  }

-  virtual void InferShape(InferShapeContextBase* ctx) const = 0;
+  virtual void InferShape(InferShapeContext* ctx) const = 0;

 protected:
  // indicate kernel DataType by input data. Defaultly all input data must be
--- a/paddle/framework/operator_test.cc
+++ b/paddle/framework/operator_test.cc
@ -113,7 +113,7 @@ class OpWithKernelTest : public OperatorWithKernel {
  using OperatorWithKernel::OperatorWithKernel;

 protected:
-  void InferShape(framework::InferShapeContextBase* ctx) const override {}
+  void InferShape(framework::InferShapeContext* ctx) const override {}
  DataType IndicateDataType(const ExecutionContext& ctx) const override {
    return DataType::FP32;
  }
--- a/paddle/framework/program_desc.h
+++ b/paddle/framework/program_desc.h
@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once

+#include <memory>
 #include <vector>
 #include "paddle/framework/framework.pb.h"
 #include "paddle/platform/macros.h"
@ -31,8 +32,6 @@ class ProgramDescBind {

  BlockDescBind *Block(size_t idx) { return blocks_[idx].get(); }

-  std::string DebugString() { return Proto()->DebugString(); }
-
  size_t Size() const { return blocks_.size(); }

  ProgramDesc *Proto();
--- a/paddle/framework/shape_inference.h
+++ b/paddle/framework/shape_inference.h
@ -20,11 +20,11 @@ namespace paddle {
 namespace framework {

 // TODO(longfei): Once after both CompileTimeInferShapeContext and
-// RuntimeInferShapeContext get merged, we can rename InferShapeContextBase into
+// RuntimeInferShapeContext get merged, we can rename InferShapeContext into
 // InferShapeContext so to replace the current InferShapeContext.
-class InferShapeContextBase {
+class InferShapeContext {
 public:
-  virtual ~InferShapeContextBase() {}
+  virtual ~InferShapeContext() {}
  virtual bool HasInput(const std::string &name) const = 0;
  virtual bool HasOutput(const std::string &name) const = 0;

--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@ -95,6 +95,19 @@ class Tensor {
  template <typename T>
  inline void CopyFrom(const Tensor& src, const platform::Place& dst_place);

+  /**
+   * @brief   Copy the content of an external vector to a tensor.
+   *
+   * @param[in] src   The external vector.
+   * @param[in] ctx   The device context contains place where to store.
+   *
+   * * @note    CopyFromVector assumes that the tensor has been resized
+   *            before invoking.
+   */
+  template <typename T>
+  inline void CopyFromVector(const std::vector<T>& src,
+                             const platform::Place& dst_place);
+
  /**
   * @brief   Return the slice of the tensor.
   *
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@ -123,6 +123,29 @@ inline void Tensor::CopyFrom(const Tensor& src,
 #endif
 }

+template <typename T>
+inline void Tensor::CopyFromVector(const std::vector<T>& src,
+                                   const platform::Place& dst_place) {
+  auto src_ptr = static_cast<const void*>(src.data());
+  platform::CPUPlace src_place;
+  auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
+  auto size = src.size() * sizeof(T);
+
+  if (platform::is_cpu_place(dst_place)) {
+    memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, src_place,
+                 src_ptr, size);
+  }
+#ifdef PADDLE_WITH_CUDA
+  else if (platform::is_gpu_place(dst_place)) {
+    memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place,
+                 src_ptr, size, 0);
+  }
+  PADDLE_ENFORCE(cudaStreamSynchronize(0),
+                 "cudaStreamSynchronize failed in Tensor CopyFromVector");
+
+#endif
+}
+
 template <typename T>
 inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
  check_memory_size<T>();
--- a/paddle/framework/tensor_test.cc
+++ b/paddle/framework/tensor_test.cc
@ -263,6 +263,93 @@ TEST(Tensor, CopyFrom) {
 #endif
 }

+TEST(Tensor, CopyFromVector) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  {
+    std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    Tensor cpu_tensor;
+
+    // Copy to CPU Tensor
+    cpu_tensor.Resize(make_ddim({3, 3}));
+    auto cpu_place = new paddle::platform::CPUPlace();
+    cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
+
+    // Compare Tensors
+    const int* cpu_ptr = cpu_tensor.data<int>();
+    const int* src_ptr = src_vec.data();
+    ASSERT_NE(src_ptr, cpu_ptr);
+    for (size_t i = 0; i < 9; ++i) {
+      EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
+    }
+
+    src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
+    cpu_tensor.Resize(make_ddim({2, 2}));
+    cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
+    cpu_ptr = cpu_tensor.data<int>();
+    src_ptr = src_vec.data();
+    ASSERT_NE(src_ptr, cpu_ptr);
+    for (size_t i = 0; i < 5; ++i) {
+      EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
+    }
+
+    delete cpu_place;
+  }
+
+#ifdef PADDLE_WITH_CUDA
+  {
+    std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    Tensor cpu_tensor;
+    Tensor gpu_tensor;
+    Tensor dst_tensor;
+
+    // Copy to CPU Tensor
+    cpu_tensor.Resize(make_ddim({3, 3}));
+    auto cpu_place = new paddle::platform::CPUPlace();
+    cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
+
+    // Copy to GPUTensor
+    gpu_tensor.Resize(make_ddim({3, 3}));
+    auto gpu_place = new paddle::platform::GPUPlace();
+    gpu_tensor.CopyFromVector<int>(src_vec, *gpu_place);
+    // Copy from GPU to CPU tensor for comparison
+    dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place);
+
+    // Compare Tensors
+    const int* src_ptr = src_vec.data();
+    const int* cpu_ptr = cpu_tensor.data<int>();
+    const int* dst_ptr = dst_tensor.data<int>();
+    ASSERT_NE(src_ptr, cpu_ptr);
+    ASSERT_NE(src_ptr, dst_ptr);
+    for (size_t i = 0; i < 9; ++i) {
+      EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
+      EXPECT_EQ(src_ptr[i], dst_ptr[i]);
+    }
+
+    src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
+
+    cpu_tensor.Resize(make_ddim({2, 2}));
+    cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
+    gpu_tensor.Resize(make_ddim({2, 2}));
+    gpu_tensor.CopyFromVector<int>(src_vec, *gpu_place);
+    dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place);
+
+    src_ptr = src_vec.data();
+    cpu_ptr = cpu_tensor.data<int>();
+    dst_ptr = dst_tensor.data<int>();
+    ASSERT_NE(src_ptr, cpu_ptr);
+    ASSERT_NE(src_ptr, dst_ptr);
+    for (size_t i = 0; i < 5; ++i) {
+      EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
+      EXPECT_EQ(src_ptr[i], dst_ptr[i]);
+    }
+
+    delete cpu_place;
+    delete gpu_place;
+  }
+#endif
+}
+
 TEST(Tensor, ReshapeToMatrix) {
  using namespace paddle::framework;
  using namespace paddle::platform;
--- a/paddle/framework/type_defs.h
+++ b/paddle/framework/type_defs.h
@ -15,6 +15,7 @@
 #pragma once
 #include <functional>
 #include <map>
+#include <memory>
 #include "paddle/platform/variant.h"

 namespace paddle {
--- a/paddle/math/tests/test_GpuProfiler.cpp
+++ b/paddle/math/tests/test_GpuProfiler.cpp
@ -162,4 +162,4 @@ int main(int argc, char** argv) {
  return RUN_ALL_TESTS();
 }

-#endif /* PADDLE_ONLY_CPU */
+#endif
--- a/paddle/memory/detail/buddy_allocator.cc
+++ b/paddle/memory/detail/buddy_allocator.cc
@ -182,7 +182,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
      max_chunk_size_ = platform::GpuMaxChunkSize();
    }
  }
-#endif  // PADDLE_ONLY_CPU
+#endif

  // Allocate a new maximum sized block
  size_t index = 0;
--- a/paddle/memory/detail/system_allocator.cc
+++ b/paddle/memory/detail/system_allocator.cc
@ -134,7 +134,7 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) {

 bool GPUAllocator::UseGpu() const { return true; }

-#endif  // PADDLE_ONLY_CPU
+#endif

 }  // namespace detail
 }  // namespace memory
--- a/paddle/memory/detail/system_allocator.h
+++ b/paddle/memory/detail/system_allocator.h
@ -51,7 +51,7 @@ class GPUAllocator : public SystemAllocator {
  size_t gpu_alloc_size_ = 0;
  size_t fallback_alloc_size_ = 0;
 };
-#endif  // PADDLE_ONLY_CPU
+#endif

 }  // namespace detail
 }  // namespace memory
--- a/paddle/memory/detail/system_allocator_test.cc
+++ b/paddle/memory/detail/system_allocator_test.cc
@ -62,4 +62,4 @@ TEST(GPUAllocator, Alloc) {
  TestAllocator(a, 2048);
  TestAllocator(a, 0);
 }
-#endif  // PADDLE_ONLY_CPU
+#endif
--- a/Show More
+++ b/Show More