From 1a968b4f64567d1281dd278a6b412cd823663e43 Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 10 Jul 2017 20:39:48 +0800 Subject: [PATCH 01/50] init --- paddle/framework/ddim.h | 10 ++++ paddle/framework/tensor.h | 27 ++++++++-- paddle/framework/tensor_types.h | 91 +++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 paddle/framework/tensor_types.h diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 223c4180be..053a09d63a 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -6,6 +6,7 @@ #include #include "paddle/framework/dim.h" +#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace framework { @@ -91,6 +92,15 @@ int arity(const DDim& ddim); std::ostream& operator<<(std::ostream&, const DDim&); +template +Eigen::DSizes ToEigenDSizes(DDim dims) const { + Eigen::DSizes dsizes; + for (int d = 0; d < paddle::framework::arity(dims); d++) { + dsizes[d] = dims[d]; + } + return dsizes; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index ce5d98b04e..81af430611 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -18,8 +18,10 @@ limitations under the License. */ #include #include "paddle/framework/ddim.h" #include "paddle/framework/enforce.h" +#include "paddle/framework/tensor_types.h" #include "paddle/memory/memory.h" #include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace framework { @@ -33,6 +35,13 @@ class Tensor { return static_cast(holder_->Ptr()); } + template + T* data() const { + PADDLE_ENFORCE(holder_ != nullptr, + "Tensor::data must be called after Tensor::mutable_data."); + return static_cast(holder_->Ptr()); + } + template ::value>::type* = nullptr> T* mutable_data(DDim dims, paddle::platform::Place place) { @@ -41,14 +50,23 @@ class Tensor { place) /* some versions of boost::variant don't have operator!= */ || holder_->Size() < product(dims) * sizeof(T)) { holder_.reset(new PlaceholderImpl(place, product(dims) * sizeof(T))); + dims_ = dims; } return static_cast(holder_->Ptr()); } - template ::value>::type* = nullptr> - T* mutable_data(DDim dims) { - return mutable_data(dims, paddle::platform::get_place()); + DDim dim() const { return dims_; } + + template + typename TTypes::ConstantTensor Tensor::tensor() { + return typename TTypes::Tensor( + data(), paddle::framework::ToEigenDSizes(dims_)); + } + + template + typename TTypes::Tensor Tensor::tensor() { + return typename TTypes::Tensor( + data(), paddle::framework::ToEigenDSizes(dims_)); } private: @@ -92,6 +110,7 @@ class Tensor { }; std::shared_ptr holder_; // holds the memory block if allocated. + DDim dims_; }; } // namespace framework diff --git a/paddle/framework/tensor_types.h b/paddle/framework/tensor_types.h new file mode 100644 index 0000000000..b68697108c --- /dev/null +++ b/paddle/framework/tensor_types.h @@ -0,0 +1,91 @@ +#pragma once + +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace framework { + +// Helper to define Tensor types given that the scalar is of type T. +template +struct TTypes { + // Rank- tensor of scalar type T. + typedef Eigen::TensorMap, + Eigen::Aligned> + Tensor; + typedef Eigen::TensorMap< + Eigen::Tensor, Eigen::Aligned> + ConstTensor; + + // Unaligned Rank- tensor of scalar type T. + typedef Eigen::TensorMap> + UnalignedTensor; + typedef Eigen::TensorMap< + Eigen::Tensor> + UnalignedConstTensor; + + typedef Eigen::TensorMap, + Eigen::Aligned> + Tensor32Bit; + + // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. + typedef Eigen::TensorMap< + Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>, + Eigen::Aligned> + Scalar; + typedef Eigen::TensorMap, + Eigen::RowMajor, IndexType>, + Eigen::Aligned> + ConstScalar; + + // Unaligned Scalar tensor of scalar type T. + typedef Eigen::TensorMap< + Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>> + UnalignedScalar; + typedef Eigen::TensorMap, + Eigen::RowMajor, IndexType>> + UnalignedConstScalar; + + // Rank-1 tensor (vector) of scalar type T. + typedef Eigen::TensorMap, + Eigen::Aligned> + Flat; + typedef Eigen::TensorMap< + Eigen::Tensor, Eigen::Aligned> + ConstFlat; + typedef Eigen::TensorMap, + Eigen::Aligned> + Vec; + typedef Eigen::TensorMap< + Eigen::Tensor, Eigen::Aligned> + ConstVec; + + // Unaligned Rank-1 tensor (vector) of scalar type T. + typedef Eigen::TensorMap> + UnalignedFlat; + typedef Eigen::TensorMap< + Eigen::Tensor> + UnalignedConstFlat; + typedef Eigen::TensorMap> + UnalignedVec; + typedef Eigen::TensorMap< + Eigen::Tensor> + UnalignedConstVec; + + // Rank-2 tensor (matrix) of scalar type T. + typedef Eigen::TensorMap, + Eigen::Aligned> + Matrix; + typedef Eigen::TensorMap< + Eigen::Tensor, Eigen::Aligned> + ConstMatrix; + + // Unaligned Rank-2 tensor (matrix) of scalar type T. + typedef Eigen::TensorMap> + UnalignedMatrix; + typedef Eigen::TensorMap< + Eigen::Tensor> + UnalignedConstMatrix; +}; + +} // namespace framework +} // namespace paddle From d6f7c3535d0907af4e2d955451e9a872d6b857a3 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 11 Jul 2017 12:52:07 +0800 Subject: [PATCH 02/50] move unaligned tensor types --- paddle/framework/tensor_types.h | 38 --------------------------------- 1 file changed, 38 deletions(-) diff --git a/paddle/framework/tensor_types.h b/paddle/framework/tensor_types.h index b68697108c..26de25b7c2 100644 --- a/paddle/framework/tensor_types.h +++ b/paddle/framework/tensor_types.h @@ -16,17 +16,6 @@ struct TTypes { Eigen::Tensor, Eigen::Aligned> ConstTensor; - // Unaligned Rank- tensor of scalar type T. - typedef Eigen::TensorMap> - UnalignedTensor; - typedef Eigen::TensorMap< - Eigen::Tensor> - UnalignedConstTensor; - - typedef Eigen::TensorMap, - Eigen::Aligned> - Tensor32Bit; - // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. typedef Eigen::TensorMap< Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>, @@ -37,14 +26,6 @@ struct TTypes { Eigen::Aligned> ConstScalar; - // Unaligned Scalar tensor of scalar type T. - typedef Eigen::TensorMap< - Eigen::TensorFixedSize, Eigen::RowMajor, IndexType>> - UnalignedScalar; - typedef Eigen::TensorMap, - Eigen::RowMajor, IndexType>> - UnalignedConstScalar; - // Rank-1 tensor (vector) of scalar type T. typedef Eigen::TensorMap, Eigen::Aligned> @@ -59,18 +40,6 @@ struct TTypes { Eigen::Tensor, Eigen::Aligned> ConstVec; - // Unaligned Rank-1 tensor (vector) of scalar type T. - typedef Eigen::TensorMap> - UnalignedFlat; - typedef Eigen::TensorMap< - Eigen::Tensor> - UnalignedConstFlat; - typedef Eigen::TensorMap> - UnalignedVec; - typedef Eigen::TensorMap< - Eigen::Tensor> - UnalignedConstVec; - // Rank-2 tensor (matrix) of scalar type T. typedef Eigen::TensorMap, Eigen::Aligned> @@ -78,13 +47,6 @@ struct TTypes { typedef Eigen::TensorMap< Eigen::Tensor, Eigen::Aligned> ConstMatrix; - - // Unaligned Rank-2 tensor (matrix) of scalar type T. - typedef Eigen::TensorMap> - UnalignedMatrix; - typedef Eigen::TensorMap< - Eigen::Tensor> - UnalignedConstMatrix; }; } // namespace framework From 958511160bc42fee48c9ad775dfb08e5198bf3e9 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 11 Jul 2017 13:40:44 +0800 Subject: [PATCH 03/50] add simple add_op_functor --- paddle/framework/ddim.cc | 12 ++++++++ paddle/framework/ddim.h | 8 +----- paddle/framework/tensor.h | 47 +++++++++++++++++++++++++++++-- paddle/framework/tensor_types.h | 14 +++++++++ paddle/operators/add_op_functor.h | 35 +++++++++++++++++++++++ 5 files changed, 107 insertions(+), 9 deletions(-) create mode 100644 paddle/operators/add_op_functor.h diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 3f949a6595..9431645cf5 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -1,4 +1,5 @@ #include "paddle/framework/ddim.h" +#include "paddle/framework/enforce.h" namespace paddle { namespace framework { @@ -220,5 +221,16 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { return os; } +template +Eigen::DSizes ToEigenDSizes(DDim dims) const { + int rank = paddle::framework::arity(dims); + PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same") + Eigen::DSizes dsizes; + for (int d = 0; d < paddle::framework::arity(dims); d++) { + dsizes[d] = dims[d]; + } + return dsizes; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 053a09d63a..a83a367196 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -93,13 +93,7 @@ int arity(const DDim& ddim); std::ostream& operator<<(std::ostream&, const DDim&); template -Eigen::DSizes ToEigenDSizes(DDim dims) const { - Eigen::DSizes dsizes; - for (int d = 0; d < paddle::framework::arity(dims); d++) { - dsizes[d] = dims[d]; - } - return dsizes; -} +Eigen::DSizes ToEigenDSizes(DDim dims) const; } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 81af430611..0fa74e7ab1 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -57,18 +57,61 @@ class Tensor { DDim dim() const { return dims_; } + size_t NumElements() const { return product(dims_); } + + template + typename TTypes::Tensor Tensor::shaped(DDim new_dims) { + Eigen::array dims = + paddle::framework::ToEigenDSizes(new_dims); + return typename TTypes::Tensor(data(), dims); + } + template - typename TTypes::ConstantTensor Tensor::tensor() { + typename TTypes::Tensor Tensor::tensor() { return typename TTypes::Tensor( data(), paddle::framework::ToEigenDSizes(dims_)); } + // flat to rank = 1 + template + typename TTypes::Flat flat() { + return shaped({NumElements()}); + } + + // to TensorType Vec + template + typename TTypes::Vec vec() { + return tensor(); + } + + // to TensorType Matrix + template + typename TTypes::Matrix matrix() { + return tensor(); + } + + // const versions of all the methods above. template - typename TTypes::Tensor Tensor::tensor() { + typename TTypes::ConstantTensor Tensor::tensor() const { return typename TTypes::Tensor( data(), paddle::framework::ToEigenDSizes(dims_)); } + template + typename TTypes::ConstFlat flat() const { + return shaped({NumElements()}); + } + + template + typename TTypes::ConstVec vec() const { + return tensor(); + } + + template + typename TTypes::ConstMatrix matrix() const { + return tensor(); + } + private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. diff --git a/paddle/framework/tensor_types.h b/paddle/framework/tensor_types.h index 26de25b7c2..4bf27a377e 100644 --- a/paddle/framework/tensor_types.h +++ b/paddle/framework/tensor_types.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once #include "unsupported/Eigen/CXX11/Tensor" diff --git a/paddle/operators/add_op_functor.h b/paddle/operators/add_op_functor.h new file mode 100644 index 0000000000..904f24b030 --- /dev/null +++ b/paddle/operators/add_op_functor.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/tensor_types.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace operators { +namespace functor { + +template +struct Add { + void Operator()(const Device& d, + typename TTypes::ConstTensor input1, + typename TTypes::ConstTensor input2, + typename TTypes::Tensor output) { + output.device(d) = input1 + input2; + } +}; +} // namespace functor +} // namespace operators +} // namespace paddle From d607f0b70308c61e5399773a475b8e8c640e63c1 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 11 Jul 2017 14:15:45 +0800 Subject: [PATCH 04/50] use cached rank --- paddle/framework/ddim.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 9431645cf5..3fd3e538e8 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -226,7 +226,7 @@ Eigen::DSizes ToEigenDSizes(DDim dims) const { int rank = paddle::framework::arity(dims); PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same") Eigen::DSizes dsizes; - for (int d = 0; d < paddle::framework::arity(dims); d++) { + for (int d = 0; d < rank; d++) { dsizes[d] = dims[d]; } return dsizes; From a5eb1d8fabe15b1e59e09fb0ed18de76de8843be Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 11 Jul 2017 16:21:19 +0800 Subject: [PATCH 05/50] fix build error --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/ddim.cc | 6 +++--- paddle/framework/ddim.h | 2 +- paddle/framework/tensor.h | 15 ++++----------- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 4409c6feae..2d26a62d0f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,5 +1,5 @@ # ddim lib -cc_library(ddim SRCS ddim.cc) +cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 3fd3e538e8..fe8f79abd4 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -222,9 +222,9 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { } template -Eigen::DSizes ToEigenDSizes(DDim dims) const { - int rank = paddle::framework::arity(dims); - PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same") +Eigen::DSizes ToEigenDSizes(const DDim& dims) { + int rank = arity(dims); + PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same"); Eigen::DSizes dsizes; for (int d = 0; d < rank; d++) { dsizes[d] = dims[d]; diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index a83a367196..18395c3636 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -93,7 +93,7 @@ int arity(const DDim& ddim); std::ostream& operator<<(std::ostream&, const DDim&); template -Eigen::DSizes ToEigenDSizes(DDim dims) const; +Eigen::DSizes ToEigenDSizes(const DDim& dims); } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 0fa74e7ab1..21818937e8 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -28,13 +28,6 @@ namespace framework { class Tensor { public: - template - const T* data() const { - PADDLE_ENFORCE(holder_ != nullptr, - "Tensor::data must be called after Tensor::mutable_data."); - return static_cast(holder_->Ptr()); - } - template T* data() const { PADDLE_ENFORCE(holder_ != nullptr, @@ -60,14 +53,14 @@ class Tensor { size_t NumElements() const { return product(dims_); } template - typename TTypes::Tensor Tensor::shaped(DDim new_dims) { + typename TTypes::Tensor shaped(DDim new_dims) { Eigen::array dims = - paddle::framework::ToEigenDSizes(new_dims); + paddle::framework::ToEigenDSizes(new_dims); return typename TTypes::Tensor(data(), dims); } template - typename TTypes::Tensor Tensor::tensor() { + typename TTypes::Tensor tensor() { return typename TTypes::Tensor( data(), paddle::framework::ToEigenDSizes(dims_)); } @@ -92,7 +85,7 @@ class Tensor { // const versions of all the methods above. template - typename TTypes::ConstantTensor Tensor::tensor() const { + typename TTypes::ConstantTensor tensor() const { return typename TTypes::Tensor( data(), paddle::framework::ToEigenDSizes(dims_)); } From 06748210d4771b37bd964e25513102cd2e0fccbf Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 12 Jul 2017 18:05:41 +0800 Subject: [PATCH 06/50] Fix some link errors about NNPACK. --- CMakeLists.txt | 3 ++- .../nnpack => cmake/external}/nnpack.cmake | 14 +++++++++++ paddle/function/CMakeLists.txt | 1 - paddle/function/nnpack/NNPACKConvOp.cpp | 23 +++++++++++-------- 4 files changed, 29 insertions(+), 12 deletions(-) rename {paddle/function/nnpack => cmake/external}/nnpack.cmake (54%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c713db3e3..af58957ea8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,7 +135,8 @@ if(WITH_GPU) endif(WITH_GPU) if(USE_NNPACK) - list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt") + include(external/nnpack) + list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) endif(USE_NNPACK) add_subdirectory(proto) diff --git a/paddle/function/nnpack/nnpack.cmake b/cmake/external/nnpack.cmake similarity index 54% rename from paddle/function/nnpack/nnpack.cmake rename to cmake/external/nnpack.cmake index 7182730ae8..d42bcb0f32 100644 --- a/paddle/function/nnpack/nnpack.cmake +++ b/cmake/external/nnpack.cmake @@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib) if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) set(NNPACK_FOUND ON) INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) + + set(NNPACK_LIBS) + list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB}) + if (NNPACK_UKERNELS_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB}) + endif() + if (NNPACK_CPUFEATURES_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB}) + endif() + if(NOT ANDROID) + list(APPEND NNPACK_LIBS "rt") + endif() else() message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") endif() diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654..a5b14c0c71 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -11,7 +11,6 @@ if(WITH_GPU) endif() if(USE_NNPACK) - include(nnpack/nnpack.cmake) list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) if(WITH_TESTING) add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index e8080c3d71..e83bca5d9f 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -58,18 +58,10 @@ public: workspaceBuffer_ = nullptr; workspaceSize_ = 0; - threadpool_ = nullptr; - if (FLAGS_nnpack_num_threads) { - threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); - VLOG(3) << "Number of threads " - << pthreadpool_get_threads_count(threadpool_); - } + create_nnpack_threadpool(); } ~NNPACKConvFunction() { - if (threadpool_) { - pthreadpool_destroy(threadpool_); - } if (workspaceBuffer_) { free(workspaceBuffer_); } @@ -225,14 +217,25 @@ public: } } + static void create_nnpack_threadpool() { + if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) { + threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); + VLOG(3) << "Number of threads " + << pthreadpool_get_threads_count(threadpool_); + } + } + private: nnp_convolution_algorithm algorithm_; nnp_convolution_transform_strategy transform_strategy_; void* workspaceBuffer_; size_t workspaceSize_; - pthreadpool_t threadpool_; + static pthreadpool_t threadpool_; }; +template +pthreadpool_t NNPACKConvFunction::threadpool_ = nullptr; + REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); } // namespace paddle From 891e5dcc48590375d37364634838b6da260fd41e Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 12 Jul 2017 20:13:07 +0800 Subject: [PATCH 07/50] Modify the default value of nnpack_allocate_outside. --- paddle/function/nnpack/NNPACKConvOp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index e83bca5d9f..f0ec77a5d0 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/function/ConvOp.h" DEFINE_bool(nnpack_allocate_outside, - false, + true, "Allocate and free workspace memory outside the NNPACK interface."); DEFINE_int32(nnpack_num_threads, 0, From bac1426d47727a9ea101dd42135a0800c2c5e023 Mon Sep 17 00:00:00 2001 From: qijun Date: Fri, 14 Jul 2017 16:57:03 +0800 Subject: [PATCH 08/50] add_op kernel implementation --- paddle/framework/operator.cc | 12 +++++++ paddle/framework/operator.h | 67 +++++++++++++++++++++++------------- paddle/framework/tensor.h | 16 ++++++++- paddle/operators/add_op.cc | 11 +++--- paddle/operators/add_op.cu | 8 +++-- paddle/operators/add_op.h | 21 +++++++---- 6 files changed, 97 insertions(+), 38 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 8f7adff8b3..25d120c9a9 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -17,6 +17,18 @@ limitations under the License. */ namespace paddle { namespace framework { +template <> +DeviceType* KernelContext::get_eigen_device() { + return device_context_.get_eigen_device(); +} + +#ifndef PADDLE_ONLY_CPU +template <> +DeviceType* KernelContext::get_eigen_device() { + return device_context_.get_eigen_device(); +} +#endif + std::string OperatorBase::DebugString() const { std::stringstream ss; ss << "=================\n"; diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index d3c55e0ceb..48cfeeb731 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -29,6 +29,21 @@ limitations under the License. */ namespace paddle { namespace framework { +template +struct EigenDeviceConverter; + +template <> +struct EigenDeviceConverter { + using EigenDeviceType = Eigen::DefaultDevice; +}; + +#ifndef PADDLE_ONLY_CPU +template <> +struct EigenDeviceConverter { + using EigenDeviceType = Eigen::GpuDevice; +}; +#endif + class OperatorBase; /** @@ -72,33 +87,39 @@ class OperatorBase { AttributeMap attrs_; }; -class OpKernel { +/** + * KernelContext is the only parameter of Kernel Run function. + * Run will get input/output variables, state such as momentum and + * device resource such as CUDA stream, cublas handle, etc. from + * KernelContext. User should construct it before run the Operator. + */ +class KernelContext { public: - /** - * KernelContext is the only parameter of Kernel Run function. - * Run will get input/output variables, state such as momentum and - * device resource such as CUDA stream, cublas handle, etc. from - * KernelContext. User should construct it before run the Operator. - */ - class KernelContext { - public: - KernelContext(const OperatorBase* op, const std::shared_ptr& scope, - const platform::DeviceContext& device_context) - : op_(*op), scope_(scope), device_context_(device_context) {} - - const Variable* Input(int index) const { - return scope_->GetVariable(op_.inputs_[index]); - } + KernelContext(const OperatorBase* op, const std::shared_ptr& scope, + const platform::DeviceContext& device_context) + : op_(*op), scope_(scope), device_context_(device_context) {} - Variable* Output(int index) const { - return scope_->GetVariable(op_.outputs_[index]); - } + const Variable* Input(int index) const { + return scope_->GetVariable(op_.inputs_[index]); + } - const OperatorBase& op_; - const std::shared_ptr& scope_; - const platform::DeviceContext& device_context_; - }; + Variable* Output(int index) const { + return scope_->GetVariable(op_.outputs_[index]); + } + + platform::DeviceContext& device_context() const { return device_context_; } + template ::EigenDeviceType> + DeviceType* get_eigen_device(); + + const OperatorBase& op_; + const std::shared_ptr& scope_; + const platform::DeviceContext& device_context_; +}; + +class OpKernel { + public: virtual void Compute(const KernelContext& context) const = 0; virtual ~OpKernel() {} diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index e14b75d0e0..01244f617c 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -35,7 +35,7 @@ class Tensor { template - T* data() const { + const T* data() const { PADDLE_ENFORCE( holder_ != nullptr, "Tenosr has not been initialized. Call Tensor::mutable_data first."); @@ -58,6 +58,20 @@ class Tensor { offset_); } + template ::value>::type* = nullptr> + T* mutable_data(paddle::platform::Place place) { + if (holder_ == nullptr || + !(holder_->Place() == + place) /* some versions of boost::variant don't have operator!= */ + || holder_->Size() < product(dims_) * sizeof(T) + offset_) { + holder_.reset(new PlaceholderImpl(place, product(dims_) * sizeof(T))); + offset_ = 0; + } + return reinterpret_cast(reinterpret_cast(holder_->Ptr()) + + offset_); + } + size_t NumElements() const { return product(dims_); } template diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 2766f0bf25..ef39e426fd 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -1,6 +1,6 @@ -#include -#include -#include +#include "paddle/operators/add_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" namespace paddle { namespace operators { @@ -36,9 +36,10 @@ The equation is: Out = X + Y )DOC"); } }; -} // namespace op +} // namespace operators } // namespace paddle REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); REGISTER_OP_CPU_KERNEL( - add_two, ::paddle::operators::AddKernel<::paddle::platform::CPUPlace>); \ No newline at end of file + add_two, + ::paddle::operators::AddKernel<::paddle::platform::CPUPlace, float>); \ No newline at end of file diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 5979345fff..f4a4fb16a6 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,5 +1,7 @@ -#include -#include +#define EIGEN_USE_GPU + +#include "paddle/operators/add_op.h" +#include "paddle/framework/op_registry.h" REGISTER_OP_GPU_KERNEL(add_two, - paddle::operators::AddKernel); \ No newline at end of file + paddle::operators::AddKernel); \ No newline at end of file diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 17d459dbc8..27a477a3ac 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -1,17 +1,26 @@ #pragma once -#include -#include +#include "glog/logging.h" +#include "paddle/framework/operator.h" +//#include "paddle/operators/add_op_functor.h" namespace paddle { namespace operators { -template +// Place can be CPUPlace or GPUPlace +template class AddKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { - LOG(INFO) << "Add kernel in " << typeid(Place).name(); + void Compute(const KernelContext& context) const override { + auto* input0 = context.Input(0); + auto* input1 = context.Input(1); + + auto* output = context.Output(0); + output->mutable_data(Place()); + + output->flat().device(*(context.get_eigen_device())) = + input0->flat() + input1->flat(); } }; -} // namespace op +} // namespace operators } // namespace paddle From 450cf18b531f104387fad516f8879590ec75dd16 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 14 Jul 2017 09:58:38 +0000 Subject: [PATCH 09/50] Add Go compiler to Dockfile.android and rename the build directory to build-android. The newest developing image was push to dockerhub, named xreki/paddle-android:dev. --- Dockerfile.android | 11 +++++++++++ paddle/scripts/docker/build_android.sh | 8 +++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Dockerfile.android b/Dockerfile.android index fa24f6f06c..c0fa58c384 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -14,6 +14,17 @@ RUN apt-get update && \ wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ apt-get clean -y +# Install Go and glide +RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ + tar -C /usr/local -xzf go.tgz && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src && \ + rm go.tgz +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin + # git credential to skip password typing RUN git config --global credential.helper store diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index bfa10c9155..53e1b818cb 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,9 +2,9 @@ set -xe -mkdir -p /paddle/build -cd /paddle/build -rm -f /paddle/install 2>/dev/null || true +mkdir -p /paddle/build_android +cd /paddle/build_android +rm -rf /paddle/install 2>/dev/null || true cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=armeabi-v7a \ @@ -22,5 +22,3 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ make -j `nproc` make install -export PATH=/paddle/install/bin:/paddle/install/opt/paddle/bin:$PATH -paddle version From 6c3027571a5d7f6e54776aa44daf950f0c3134e9 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 14 Jul 2017 10:08:51 +0000 Subject: [PATCH 10/50] Disable invalid cross-compiling variables for cmake of higher version. --- cmake/cross_compiling/android.cmake | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake index dcfbc5d012..5e3e437a8d 100644 --- a/cmake/cross_compiling/android.cmake +++ b/cmake/cross_compiling/android.cmake @@ -108,6 +108,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ENDIF() IF(ANDROID_ABI STREQUAL "arm64-v8a") SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) + SET(CMAKE_SYSTEM_PROCESSOR aarch64) ENDIF() SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") ENDIF() @@ -166,7 +167,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ENDIF() IF(ANDROID_ABI STREQUAL "arm64-v8a") - LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) + LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) ENDIF() STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") @@ -193,6 +194,10 @@ ELSE() SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN}) ENDIF() SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI}) - SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) - SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) + IF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) + ENDIF() + ENDIF() ENDIF() From 8bcd1faffcbe17f1879a18b04bab1bbf5a0eadd2 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 14 Jul 2017 18:12:14 +0800 Subject: [PATCH 11/50] refactor product(DDim ddim) --- paddle/framework/ddim.cc | 15 +++++++++------ paddle/framework/ddim_test.cc | 3 +++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 3f949a6595..a1ae079f4a 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -178,13 +178,16 @@ std::vector vectorize(const DDim& ddim) { return result; } -ssize_t product(const DDim& ddim) { - ssize_t result = 1; - std::vector v = vectorize(ddim); - for (auto i : v) { - result *= i; +struct ProductVisitor : public boost::static_visitor { + template + ssize_t operator()(const Dim& dim) { + return product(dim); } - return result; +}; + +ssize_t product(const DDim& ddim) { + ProductVisitor visitor; + return boost::apply_visitor(visitor, ddim); } ///\cond HIDDEN diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 36eef02370..8ce7886f8a 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -52,6 +52,9 @@ TEST(DDim, Equality) { // product of a DDim EXPECT_EQ(paddle::framework::product(vddim), 45); + EXPECT_EQ( + paddle::framework::product(paddle::framework::make_ddim({3, 2, 5, 3})), + 90); } TEST(DDim, Print) { From ee90c2d22b2ff72fcc9983583bdb78962cb5ef72 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 12:44:01 +0800 Subject: [PATCH 12/50] add slice_dim draft --- paddle/framework/dim.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/paddle/framework/dim.h b/paddle/framework/dim.h index 883fdc55eb..8dc1bab06d 100644 --- a/paddle/framework/dim.h +++ b/paddle/framework/dim.h @@ -401,5 +401,20 @@ HOSTDEVICE Dim linear_to_dimension(int linear_index, Dim extents) { return result; } +template +Dim slice(const Dim& dim, int begin, int end) { + PADDLE_ENFORCE(begin < end, + "Begin index must be less than end index in Dim slice."); + PADDLE_ENFORCE(begin >= 0 && end <= S && end - begin == D, + "Index error occurs in Dim slice."); + if (begin > 0) { + return slice(dim.tail, begin - 1, end - 1); + } + if (D == 1) { + return Dim<1>(dim.head); + } + return Dim(dim.head, slice(dim.tail, 0, end - 1)); +} + } // namespace framework } // namespace paddle From 68adb9541d339ffd0df43a7a45a5a4adf16f2067 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 15:00:18 +0800 Subject: [PATCH 13/50] enbale tensor memory test --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/tensor.h | 50 ++++++++------ paddle/framework/tensor_test.cc | 118 +++++++++++++++++--------------- 3 files changed, 92 insertions(+), 78 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 8415ce67e9..f7f606e4b8 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim paddle_memory) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 62e0710a82..81db722c99 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -29,8 +29,6 @@ class Tensor { public: Tensor() : numel_(0), offset_(0) {} - Tensor& operator=(const Tensor& src) = delete; - template const T* data() const { CheckDims(); @@ -39,13 +37,13 @@ class Tensor { } template - T* mutable_data(DDim dims, paddle::platform::Place place) { + T* mutable_data(DDim dims, platform::Place place) { set_dims(dims); return mutable_data(place); } template - T* mutable_data(paddle::platform::Place place) { + T* mutable_data(platform::Place place) { PADDLE_ENFORCE(numel_ > 0, "Tensor::numel_ must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); @@ -53,7 +51,18 @@ class Tensor { !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ || holder_->size() < numel_ * sizeof(T) + offset_) { - holder_.reset(new PlaceholderImpl(place, numel_ * sizeof(T))); + switch (place.which()) { + case 0: + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); + break; + + case 1: + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); + break; + } + offset_ = 0; } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + @@ -69,7 +78,7 @@ class Tensor { } template - void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) { + void CopyFrom(const Tensor& src, platform::Place dst_place) { PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && platform::is_cpu_place(dst_place), "Tensor::CopyFrom only support CPU now."); @@ -119,38 +128,37 @@ class Tensor { struct Placeholder { virtual ~Placeholder() {} virtual void* ptr() const = 0; - virtual paddle::platform::Place place() const = 0; + virtual platform::Place place() const = 0; virtual size_t size() const = 0; }; - template + template struct PlaceholderImpl : public Placeholder { private: + template class Deleter { public: - Deleter(platform::Place place) : place_(place) {} - void operator()(T* ptr) { - paddle::memory::Free(place_, static_cast(ptr)); - } + Deleter(PType place) : place_(place) {} + void operator()(T* ptr) { memory::Free(place_, static_cast(ptr)); } private: - paddle::platform::Place place_; + PType place_; }; public: - PlaceholderImpl(paddle::platform::Place place, size_t size) - : ptr_(static_cast(paddle::memory::Alloc(place, size)), - Deleter(place)), + PlaceholderImpl(PlaceType place, size_t size) + : ptr_(static_cast(memory::Alloc(place, size)), + Deleter(place)), place_(place), size_(size) {} virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } - virtual paddle::platform::Place place() const { return place_; } + virtual platform::Place place() const { return place_; } - std::unique_ptr ptr_; - paddle::platform::Place place_; // record the place of ptr_. - size_t size_; // size of the memory block. + std::unique_ptr> ptr_; + platform::Place place_; // record the place of ptr_. + size_t size_; // size of the memory block. }; template @@ -166,7 +174,7 @@ class Tensor { DDim dims_; size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. -}; +}; // namespace framework } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 255f69372f..79bd0cc607 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -47,7 +47,7 @@ TEST(Tensor, DataAssert) { /* following tests are not available at present because Memory::Alloc() and Memory::Free() have not been ready. - +*/ TEST(Tensor, MutableData) { using namespace paddle::framework; using namespace paddle::platform; @@ -72,28 +72,29 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - - { - Tensor src_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); - EXPECT_NE(p1, nullptr); - // set src_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); - EXPECT_NE(p2, nullptr); - EXPECT_NE(p1, p2); - // set src_tensor a new dim with same size - // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); - EXPECT_EQ(p1, p2); - // set src_tensor a new dim with smaller size - // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); - EXPECT_EQ(p1, p2); - } + /* + { + Tensor src_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set src_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set src_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); + EXPECT_EQ(p1, p2); + // set src_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); + EXPECT_EQ(p1, p2); + } + */ } TEST(Tensor, ShareDataFrom) { @@ -108,9 +109,11 @@ TEST(Tensor, ShareDataFrom) { dst_tensor.ShareDataFrom(src_tensor); } catch (EnforceNotMet err) { caught = true; - std::string msg = "Tenosr holds no memory. Call Tensor::mutable_data -first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); -++i) { ASSERT_EQ(what[i], msg[i]); + std::string msg = + "Tenosr holds no memory. Call Tensor::mutable_data first."; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); } } ASSERT_TRUE(caught); @@ -120,13 +123,15 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - { - Tensor src_tensor; - Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); - dst_tensor.ShareDataFrom(src_tensor); - ASSERT_EQ(src_tensor.data(), dst_tensor.data()); - } + /* + { + Tensor src_tensor; + Tensor dst_tensor; + src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); + dst_tensor.ShareDataFrom(src_tensor); + ASSERT_EQ(src_tensor.data(), dst_tensor.data()); + } + */ } TEST(Tensor, Slice) { @@ -155,27 +160,29 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } - { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); - Tensor slice_tensor = src_tensor.Slice(2, 6); - DDim slice_dims = slice_tensor.dims(); - ASSERT_EQ(arity(slice_dims), 2); - EXPECT_EQ(slice_dims[0], 4); - EXPECT_EQ(slice_dims[1], 9); - - uintptr_t src_data_address = - reinterpret_cast(src_tensor.data()); - uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); - uintptr_t slice_data_address = - reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); - EXPECT_EQ(src_data_address, src_mutable_data_address); - EXPECT_EQ(slice_data_address, slice_mutable_data_address); - EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); - } + /* + { + Tensor src_tensor; + src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); + Tensor slice_tensor = src_tensor.Slice(2, 6); + DDim slice_dims = slice_tensor.dims(); + ASSERT_EQ(arity(slice_dims), 2); + EXPECT_EQ(slice_dims[0], 4); + EXPECT_EQ(slice_dims[1], 9); + + uintptr_t src_data_address = + reinterpret_cast(src_tensor.data()); + uintptr_t src_mutable_data_address = reinterpret_cast( + src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); + uintptr_t slice_data_address = + reinterpret_cast(slice_tensor.data()); + uintptr_t slice_mutable_data_address = reinterpret_cast( + slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); + EXPECT_EQ(src_data_address, src_mutable_data_address); + EXPECT_EQ(slice_data_address, slice_mutable_data_address); + EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); + } + */ } TEST(Tensor, CopyFrom) { @@ -202,5 +209,4 @@ TEST(Tensor, CopyFrom) { for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } -} -*/ \ No newline at end of file +} \ No newline at end of file From 66cf21c880fba791910dc449dfc716d11c52803f Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 07:16:11 +0000 Subject: [PATCH 14/50] fix compile error --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/tensor_test.cc | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f7f606e4b8..b8bfab5320 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,7 +2,7 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim paddle_memory) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim place paddle_memory) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 79bd0cc607..30b1448a9b 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -72,7 +72,7 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - /* + #ifdef __CUDACC__ { Tensor src_tensor; float* p1 = nullptr; @@ -94,7 +94,7 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); EXPECT_EQ(p1, p2); } - */ + #endif } TEST(Tensor, ShareDataFrom) { @@ -123,7 +123,7 @@ TEST(Tensor, ShareDataFrom) { ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - /* + #ifdef __CUDACC__ { Tensor src_tensor; Tensor dst_tensor; @@ -131,7 +131,7 @@ TEST(Tensor, ShareDataFrom) { dst_tensor.ShareDataFrom(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - */ + #endif } TEST(Tensor, Slice) { @@ -160,7 +160,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } - /* + #ifdef __CUDACC__ { Tensor src_tensor; src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); @@ -182,7 +182,7 @@ TEST(Tensor, Slice) { EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); } - */ + #endif } TEST(Tensor, CopyFrom) { @@ -209,4 +209,4 @@ TEST(Tensor, CopyFrom) { for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } -} \ No newline at end of file +} From afa2a88d7896a03feb18b3cf6e6736c8ca79fcad Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 15 Jul 2017 15:25:06 +0800 Subject: [PATCH 15/50] add conditional compilation for tensor --- paddle/framework/tensor.h | 5 ++ paddle/framework/tensor_test.cc | 108 ++++++++++++++++---------------- 2 files changed, 59 insertions(+), 54 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 81db722c99..29bad7a00a 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -51,6 +51,7 @@ class Tensor { !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ || holder_->size() < numel_ * sizeof(T) + offset_) { +#ifdef __CUDACC__ switch (place.which()) { case 0: holder_.reset(new PlaceholderImpl( @@ -62,6 +63,10 @@ class Tensor { boost::get(place), numel_ * sizeof(T))); break; } +#else + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); +#endif offset_ = 0; } diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 30b1448a9b..84c6f0cf65 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -72,29 +72,29 @@ TEST(Tensor, MutableData) { p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); EXPECT_EQ(p1, p2); } - #ifdef __CUDACC__ - { - Tensor src_tensor; - float* p1 = nullptr; - float* p2 = nullptr; - // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); - EXPECT_NE(p1, nullptr); - // set src_tensor a new dim with large size - // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); - EXPECT_NE(p2, nullptr); - EXPECT_NE(p1, p2); - // set src_tensor a new dim with same size - // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); - EXPECT_EQ(p1, p2); - // set src_tensor a new dim with smaller size - // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); - EXPECT_EQ(p1, p2); - } - #endif +#ifdef __CUDACC__ + { + Tensor src_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set src_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set src_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); + EXPECT_EQ(p1, p2); + // set src_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); + EXPECT_EQ(p1, p2); + } +#endif } TEST(Tensor, ShareDataFrom) { @@ -123,15 +123,15 @@ TEST(Tensor, ShareDataFrom) { ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } - #ifdef __CUDACC__ - { - Tensor src_tensor; - Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); - dst_tensor.ShareDataFrom(src_tensor); - ASSERT_EQ(src_tensor.data(), dst_tensor.data()); - } - #endif +#ifdef __CUDACC__ + { + Tensor src_tensor; + Tensor dst_tensor; + src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); + dst_tensor.ShareDataFrom(src_tensor); + ASSERT_EQ(src_tensor.data(), dst_tensor.data()); + } +#endif } TEST(Tensor, Slice) { @@ -160,29 +160,29 @@ TEST(Tensor, Slice) { EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); } - #ifdef __CUDACC__ - { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); - Tensor slice_tensor = src_tensor.Slice(2, 6); - DDim slice_dims = slice_tensor.dims(); - ASSERT_EQ(arity(slice_dims), 2); - EXPECT_EQ(slice_dims[0], 4); - EXPECT_EQ(slice_dims[1], 9); +#ifdef __CUDACC__ + { + Tensor src_tensor; + src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); + Tensor slice_tensor = src_tensor.Slice(2, 6); + DDim slice_dims = slice_tensor.dims(); + ASSERT_EQ(arity(slice_dims), 2); + EXPECT_EQ(slice_dims[0], 4); + EXPECT_EQ(slice_dims[1], 9); - uintptr_t src_data_address = - reinterpret_cast(src_tensor.data()); - uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); - uintptr_t slice_data_address = - reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); - EXPECT_EQ(src_data_address, src_mutable_data_address); - EXPECT_EQ(slice_data_address, slice_mutable_data_address); - EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); - } - #endif + uintptr_t src_data_address = + reinterpret_cast(src_tensor.data()); + uintptr_t src_mutable_data_address = reinterpret_cast( + src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); + uintptr_t slice_data_address = + reinterpret_cast(slice_tensor.data()); + uintptr_t slice_mutable_data_address = reinterpret_cast( + slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); + EXPECT_EQ(src_data_address, src_mutable_data_address); + EXPECT_EQ(slice_data_address, slice_mutable_data_address); + EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); + } +#endif } TEST(Tensor, CopyFrom) { From 9e0c6800c53701fc50dfb69a2c8b6de19c52c559 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 15 Jul 2017 20:18:54 +0800 Subject: [PATCH 16/50] Python Generate OpCreation Methods by OpProto All OpCreation method are generated by `create_op_creation_methods::__bootstrap__` method, and stores in `op_creations` object and its methods. There are three parts to implement this feature. 1. Get all registered `OpProto` from C++ side. It is implemented in `get_all_op_protos` method. 1. Create a function to convert `kwargs` to `OpDesc` base on each op's `OpProto`. The `OpDescCreationMethod` class. 1. Convert `OpProto` to `docstring` by `get_docstring_from_op_proto` method. All three methods are unit tested. The `__bootstrap__` just combines them together and create a method in runtime. For details, please reference the doc string in `create_op_creation_methods.py` and the unit test `test_op_creation_methods.py`. --- paddle/framework/op_registry.h | 24 ++ paddle/framework/operator.cc | 28 +- paddle/framework/operator.h | 8 +- paddle/pybind/pybind.cc | 17 ++ .../framework/create_op_creation_methods.py | 235 +++++++++++++++++ .../tests/test_op_creation_methods.py | 243 +++++++++++++++++- python/paddle/v2/optimizer.py | 2 + 7 files changed, 539 insertions(+), 18 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index de20e7af05..3d67541db2 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -199,8 +200,12 @@ class OpRegistry { } static OperatorPtr CreateOp(const OpDesc& op_desc) { + //! Create a OpPtr by type. std::string op_type = op_desc.type(); OperatorPtr op(creators().at(op_type)()); + + //! Fill op's data member. Not use constructor because it will be noising + //! for Op developer. op->desc_ = op_desc; op->inputs_.reserve((size_t)op_desc.inputs_size()); std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), @@ -208,10 +213,18 @@ class OpRegistry { op->outputs_.reserve((size_t)op_desc.outputs_size()); std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), std::back_inserter(op->outputs_)); + + //! Fill attrs, and validate attrs. for (auto& attr : op_desc.attrs()) { op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); } op_checkers().at(op_type).Check(op->attrs_); + + //! Convert Temporary variable name to an unique variable name. + AssignTempVariable(op.get()); + + //! Other op's custom Init for a complex Op. For simple Op, the Init + //! method do nothing. op->Init(); return op; } @@ -222,6 +235,17 @@ class OpRegistry { }; private: + static void AssignTempVariable(OperatorBase* op) { + static std::atomic gUniqId(0UL); + for (auto& outname : op->outputs_) { + if (outname == OperatorBase::TMP_VAR_NAME()) { + outname += op->Type(); + outname += "@"; + outname += std::to_string(gUniqId.fetch_add(1)); + } + } + } + static std::unordered_map& creators() { static std::unordered_map creators_; return creators_; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index d065670829..a467d328e1 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -19,23 +19,21 @@ namespace framework { std::string OperatorBase::DebugString() const { std::stringstream ss; - ss << "=================\n"; - ss << "type = " << desc_.type() << "\n"; - ss << "inputs = ["; - for (auto& ipt : inputs_) { - ss << ipt << ", "; + ss << "Op(" << Type() << "), inputs:("; + for (size_t i = 0; i < inputs_.size(); ++i) { + ss << inputs_[i]; + if (i != inputs_.size() - 1) { + ss << ", "; + } } - ss << "]\n"; - ss << "outputs = ["; - for (auto& opt : outputs_) { - ss << opt << ", "; + ss << "), outputs:("; + for (size_t i = 0; i < outputs_.size(); ++i) { + ss << outputs_[i]; + if (i != outputs_.size() - 1) { + ss << ", "; + } } - ss << "]\n"; - ss << "attr_keys = ["; - for (auto& attr : attrs_) { - ss << attr.first << ", "; - } - ss << "]\n"; + ss << ")."; return ss.str(); } diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index cf79f379fa..cc166048b7 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -39,6 +39,13 @@ using OperatorPtr = std::shared_ptr; */ class OperatorBase { public: + /// If a variable is a empty variable, that name will be used. + static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; } + + /// If a variable is a temporary variable, that name will be set in Python, + /// but it will be convert to a unique name in scope after OpCreator. + static std::string TMP_VAR_NAME() { return "@TEMP@"; } + virtual ~OperatorBase() {} template @@ -62,7 +69,6 @@ class OperatorBase { virtual void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const = 0; - protected: std::string Type() const { return desc_.type(); } public: diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index c1a025ed04..b5ead21fd0 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -63,6 +63,23 @@ All parameter, weight, gradient are variables in Paddle. } return ret_values; }); + m.def_submodule( + "var_names", + "The module will return special predefined variable name in Paddle") + .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) + .def("temp", pd::OperatorBase::TMP_VAR_NAME); + + py::class_(m, "Operator") + .def("__str__", &pd::OperatorBase::DebugString) + .def_static("create", [](const std::string& protobin) { + pd::OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + return pd::OpRegistry::CreateOp(desc); + }); return m.ptr(); } diff --git a/python/paddle/v2/framework/create_op_creation_methods.py b/python/paddle/v2/framework/create_op_creation_methods.py index 2fcdfead25..c2a7ae7692 100644 --- a/python/paddle/v2/framework/create_op_creation_methods.py +++ b/python/paddle/v2/framework/create_op_creation_methods.py @@ -1,11 +1,246 @@ import paddle.v2.framework.core as core import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 +import cStringIO def get_all_op_protos(): + """ + Get all registered op proto from Paddle C++ + :return: list of OpProto + """ protostrs = core.get_all_op_protos() ret_values = [] for pbstr in protostrs: op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) ret_values.append(op_proto) return ret_values + + +class OpDescCreationMethod(object): + """ + A Functor object to convert user input(use key word args) to OpDesc based on + OpProto. + + :param op_proto: The OpProto object. + :type op_proto: op_proto_pb2.OpProto + """ + + def __init__(self, op_proto): + if not isinstance(op_proto, op_proto_pb2.OpProto): + raise TypeError("Argument should be OpProto") + self.__op_proto__ = op_proto + + def __call__(self, *args, **kwargs): + """ + Convert user input to OpDesc. Only key-word args are supported. + :return: OpDesc based on user input + :rtype: op_desc_pb2.OpDesc + """ + if len(args) != 0: + raise ValueError("Only keyword arguments is supported by Paddle") + op_desc = op_desc_pb2.OpDesc() + + # Inputs + ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output( + "input", kwargs, self.__op_proto__.inputs) + op_desc.inputs.extend(ipts) + if ipt_format is not None: + op_desc.attrs.extend([ipt_format]) + + # Outputs + outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output( + "output", kwargs, self.__op_proto__.outputs) + op_desc.outputs.extend(outs) + if out_format is not None: + op_desc.attrs.extend([out_format]) + if len(tmp_index) != 0: + tmp_index_attr = op_desc.attrs.add() + tmp_index_attr.type = attr_type_pb2.INTS + tmp_index_attr.name = "temporary_index" + tmp_index_attr.ints.extend(tmp_index) + + # Types + op_desc.type = self.__op_proto__.type + + # Attrs + for attr in self.__op_proto__.attrs: + if attr.generated: + continue + user_defined_attr = kwargs.get(attr.name, None) + if user_defined_attr is not None: + new_attr = op_desc.attrs.add() + new_attr.name = attr.name + new_attr.type = attr.type + if attr.type == attr_type_pb2.INT: + new_attr.i = user_defined_attr + elif attr.type == attr_type_pb2.FLOAT: + new_attr.f = user_defined_attr + elif attr.type == attr_type_pb2.STRING: + new_attr.s = user_defined_attr + elif attr.type == attr_type_pb2.INTS: + new_attr.ints.extend(user_defined_attr) + elif attr.type == attr_type_pb2.FLOATS: + new_attr.floats.extend(user_defined_attr) + elif attr.type == attr_type_pb2.STRINGS: + new_attr.strings.extend(user_defined_attr) + else: + raise NotImplementedError("Not support attribute type " + + attr.type) + + return op_desc + + @staticmethod + def extract_input_or_output(in_out, kwargs, meta): + """ + Extract input variable names or output variable names from key-word + arguments, which base on VarProtos. + + :param in_out: "input" or "output" + :param kwargs: key-word arguments that user inputted. + :param meta: a list of VarProto + :return: The three object will be return. The variable names. The + input_format or output_format attribute(None if the input or output is + not multiple). The temporary variable index list. + """ + multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta)) + tmp_index = [] + retv = [] + if multiple: + var_format = op_desc_pb2.AttrDesc() + var_format.type = attr_type_pb2.INTS + var_format.name = "%s_format" % in_out + var_format.ints.append(0) + + for var in meta: + var_name = var.name + + if var.temporary: + var_name = [core.var_names.temp()] + tmp_index.append(len(retv)) + else: + var_name = kwargs.get(var_name, []) + if not isinstance(var_name, list): + var_name = [var_name] + retv.extend(var_name) + var_format.ints.append(len(var_name) + var_format.ints[-1]) + return retv, var_format, tmp_index + else: + for var in meta: + if var.temporary: + retv.append(kwargs.get(var.name, core.var_names.temp())) + tmp_index.append(len(retv)) + else: + retv.append(kwargs.get(var.name, core.var_names.empty())) + return retv, None, tmp_index + + @staticmethod + def any_is_true(generator): + """ + Reduce a bool array to one. If any of them is True, then return True. + """ + for flag in generator: + if flag: + return True + return False + + +def get_docstring_from_op_proto(op_proto): + """ + Generate docstring from a OpProto + :param op_proto: a OpProto instance. + :type op_proto: op_proto_pb2.OpProto + :return: docstring + """ + if not isinstance(op_proto, op_proto_pb2.OpProto): + raise TypeError("Input must be OpProto") + f = cStringIO.StringIO() + f.write(op_proto.comment) + f.write("\n") + + def __append_param__(name, comment, type): + # Maybe replace the following line with template engine is better. + f.write(":param ") + f.write(name) + f.write(": ") + f.write(comment) + f.write("\n") + f.write(":type ") + f.write(name) + f.write(": ") + f.write(type) + f.write("\n") + + for ipt in op_proto.inputs: + __append_param__(ipt.name, ipt.comment, "list | basestr" + if ipt.multiple else "basestr") + + temp_var_prefix = \ + "This is a temporary variable. It does not have to set by user. " + for opt in op_proto.outputs: + __append_param__(opt.name, opt.comment if not opt.temporary else + temp_var_prefix + opt.comment, "list | basestr" + if opt.multiple else "basestr") + + for attr in op_proto.attrs: + attr_type = None + if attr.type == attr_type_pb2.INT: + attr_type = "int" + elif attr.type == attr_type_pb2.FLOAT: + attr_type = "float" + elif attr.type == attr_type_pb2.STRING: + attr_type = "basestr" + elif attr.type == attr_type_pb2.INTS: + attr_type = "list of int" + elif attr.type == attr_type_pb2.FLOATS: + attr_type = "list of float" + elif attr.type == attr_type_pb2.STRINGS: + attr_type = "list of basestr" + + if attr_type is None: + raise RuntimeError("Not supported attribute type " + attr.type) + + __append_param__(attr.name, attr.comment, attr_type) + + return f.getvalue() + + +def create_op_creation_method(op_proto): + """ + Generate op creation method for an OpProto + """ + method = OpDescCreationMethod(op_proto) + + def __impl__(*args, **kwargs): + opdesc = method(*args, **kwargs) + return core.Operator.create(opdesc.SerializeToString()) + + __impl__.__doc__ = get_docstring_from_op_proto(op_proto) + return __impl__ + + +class OpCreationsHolder(object): + """ + A object will holds all op creation methods. + + Use `op_creations.xxx_op` to access them. + """ + pass + + +op_creations = OpCreationsHolder() + + +def __bootstrap__(): + """ + Bootstrap function for this module. It will dynamic create all op creation + methods in runtime. + """ + for op_proto in get_all_op_protos(): + func = create_op_creation_method(op_proto) + func.__name__ = str(op_proto.type) + setattr(op_creations, func.__name__, func) + + +__bootstrap__() diff --git a/python/paddle/v2/framework/tests/test_op_creation_methods.py b/python/paddle/v2/framework/tests/test_op_creation_methods.py index b205e2cabb..41db7c0d53 100644 --- a/python/paddle/v2/framework/tests/test_op_creation_methods.py +++ b/python/paddle/v2/framework/tests/test_op_creation_methods.py @@ -1,9 +1,13 @@ import unittest import paddle.v2.framework.create_op_creation_methods as creation +import paddle.v2.framework.core as core +import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 -class TestOpCreationsMethods(unittest.TestCase): - def test_all_protos(self): +class TestGetAllProtos(unittest.TestCase): + def test_all(self): all_protos = creation.get_all_op_protos() self.assertNotEqual(0, len(all_protos)) @@ -11,5 +15,240 @@ class TestOpCreationsMethods(unittest.TestCase): self.assertTrue(each.IsInitialized()) +class TestOpDescCreationMethod(unittest.TestCase): + def test_plain_input_output(self): + op = op_proto_pb2.OpProto() + op.type = "test" + ipt = op.inputs.add() + ipt.name = "X" + ipt.comment = "not matter" + + ipt = op.inputs.add() + ipt.name = "Y" + ipt.comment = "not matter" + + opt = op.outputs.add() + opt.name = "Z" + opt.comment = "not matter" + + op.comment = "not matter" + + self.assertTrue(op.IsInitialized()) + + method = creation.OpDescCreationMethod(op) + output = method(X="a", Y="b", Z="c") + + expected = op_desc_pb2.OpDesc() + expected.type = "test" + expected.inputs.extend(["a", "b"]) + expected.outputs.append("c") + self.assertEqual(expected, output) + + def test_multiple_input_plain_output(self): + op = op_proto_pb2.OpProto() + op.type = "fc" + ipt = op.inputs.add() + ipt.name = "X" + ipt.comment = "" + ipt.multiple = True + + ipt = op.inputs.add() + ipt.name = "W" + ipt.comment = "" + ipt.multiple = True + + ipt = op.inputs.add() + ipt.name = "b" + ipt.comment = "" + + out = op.outputs.add() + out.name = "Y" + out.comment = "" + + op.comment = "" + self.assertTrue(op.IsInitialized()) + method = creation.OpDescCreationMethod(op) + + generated1 = method(X="x", W="w", b="b", Y="y") + expected1 = op_desc_pb2.OpDesc() + expected1.inputs.extend(['x', 'w', 'b']) + expected1.outputs.extend(['y']) + expected1.type = 'fc' + attr = expected1.attrs.add() + attr.name = 'input_format' + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 1, 2, 3]) + self.assertEqual(expected1, generated1) + + generated2 = method( + X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y') + expected2 = op_desc_pb2.OpDesc() + expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b']) + expected2.outputs.extend(['y']) + expected2.type = 'fc' + attr = expected2.attrs.add() + attr.name = 'input_format' + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 3, 6, 7]) + self.assertEqual(expected2, generated2) + + def test_attrs(self): + op = op_proto_pb2.OpProto() + op.type = "test" + ipt = op.inputs.add() + ipt.name = 'X' + ipt.comment = "" + + def __add_attr__(name, type): + attr = op.attrs.add() + attr.name = name + attr.comment = "" + attr.type = type + + __add_attr__("int_attr", attr_type_pb2.INT) + __add_attr__("float_attr", attr_type_pb2.FLOAT) + __add_attr__("string_attr", attr_type_pb2.STRING) + __add_attr__("ints_attr", attr_type_pb2.INTS) + __add_attr__("floats_attr", attr_type_pb2.FLOATS) + __add_attr__("strings_attr", attr_type_pb2.STRINGS) + + op.comment = "" + self.assertTrue(op.IsInitialized()) + + method = creation.OpDescCreationMethod(op) + + generated = method( + X="a", + int_attr=10, + float_attr=3.2, + string_attr="test_str", + ints_attr=[0, 1, 2, 3, 4], + floats_attr=[0.2, 3.2, 4.5], + strings_attr=["a", "b", "c"]) + + expected = op_desc_pb2.OpDesc() + expected.type = "test" + expected.inputs.extend(['a']) + attr = expected.attrs.add() + attr.name = "int_attr" + attr.type = attr_type_pb2.INT + attr.i = 10 + + attr = expected.attrs.add() + attr.name = "float_attr" + attr.type = attr_type_pb2.FLOAT + attr.f = 3.2 + + attr = expected.attrs.add() + attr.name = "string_attr" + attr.type = attr_type_pb2.STRING + attr.s = "test_str" + + attr = expected.attrs.add() + attr.name = "ints_attr" + attr.type = attr_type_pb2.INTS + attr.ints.extend([0, 1, 2, 3, 4]) + + attr = expected.attrs.add() + attr.name = 'floats_attr' + attr.type = attr_type_pb2.FLOATS + attr.floats.extend([0.2, 3.2, 4.5]) + + attr = expected.attrs.add() + attr.name = 'strings_attr' + attr.type = attr_type_pb2.STRINGS + attr.strings.extend(['a', 'b', 'c']) + + self.assertEqual(expected, generated) + + def test_input_temporary_output(self): + op = op_proto_pb2.OpProto() + op.type = "test" + out = op.outputs.add() + out.name = "OUT" + out.comment = "" + + out = op.outputs.add() + out.name = "TMP" + out.comment = "" + out.temporary = True + + out = op.outputs.add() + out.name = "OUT2" + out.comment = "" + op.comment = "" + + method = creation.OpDescCreationMethod(op) + generated = method(OUT="a", OUT2="b") + desc = op_desc_pb2.OpDesc() + desc.outputs.extend(["a", core.var_names.temp(), "b"]) + desc.type = "test" + attr = desc.attrs.add() + attr.name = "temporary_index" + attr.type = attr_type_pb2.INTS + attr.ints.append(2) + self.assertEqual(generated, desc) + + +class TestOpCreationDocStr(unittest.TestCase): + def test_all(self): + op = op_proto_pb2.OpProto() + op.type = "test" + op.comment = """Test Op. + +This op is used for unit test, not a real op. +""" + a = op.inputs.add() + a.name = "a" + a.comment = "Input a for test op" + a.multiple = True + + b = op.inputs.add() + b.name = "b" + b.comment = "Input b for test op" + self.assertTrue(op.IsInitialized()) + + o1 = op.outputs.add() + o1.name = "output" + o1.comment = "The output of test op" + + o2 = op.outputs.add() + o2.name = "temp output" + o2.comment = "The temporary output of test op" + o2.temporary = True + + test_str = op.attrs.add() + test_str.name = "str_attr" + test_str.type = attr_type_pb2.STRING + test_str.comment = "A string attribute for test op" + + actual = creation.get_docstring_from_op_proto(op) + expected_docstring = '''Test Op. + +This op is used for unit test, not a real op. + +:param a: Input a for test op +:type a: list | basestr +:param b: Input b for test op +:type b: basestr +:param output: The output of test op +:type output: basestr +:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op +:type temp output: basestr +:param str_attr: A string attribute for test op +:type str_attr: basestr +''' + self.assertEqual(expected_docstring, actual) + + +class TestOpCreations(unittest.TestCase): + def test_all(self): + add_op = creation.op_creations.add_two(X="a", Y="b", Out="z") + self.assertIsNotNone(add_op) + # Invoke C++ DebugString() + self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', + str(add_op)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index b6ee51cfe8..173a30a411 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -25,6 +25,8 @@ class Optimizer(object): self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config( __impl__) + if swig_api is None: + raise RuntimeError("paddle.v2 currently need swig_paddle") self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto( self.__opt_conf_proto__) From d3a749a5bfb32c61b9faa24424d36bb0fa471edb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sun, 16 Jul 2017 11:13:39 +0800 Subject: [PATCH 17/50] CMake `op_library` function * It is used to create an operator library. It handles to split CPU and GPU sources and links operator common libraries. * It also give a reasonable warning and error when operator developer not correctly implement an operator. * Warning for lack of GPU kernel. * Same interface as `cc_library` to make code style consistent. --- paddle/operators/CMakeLists.txt | 48 +++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 40bb326512..b2ea8eb344 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -1,6 +1,44 @@ -if(WITH_GPU) - nv_library(add_op SRCS add_op.cc add_op.cu DEPS operator op_registry glog ddim) -else() - cc_library(add_op SRCS add_op.cc DEPS operator op_registry glog ddim) -endif() +function(op_library TARGET) + # op_library is a function to create op library. The interface is same as + # cc_library. But it handle split GPU/CPU code and link some common library + # for ops. + set(cc_srcs) + set(cu_srcs) + set(op_common_deps operator op_registry glog ddim) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + foreach(src ${op_library_SRCS}) + if (${src} MATCHES ".*\\.cu$") + list(APPEND cu_srcs ${src}) + elseif(${src} MATCHES ".*\\.cc$") + list(APPEND cc_srcs ${src}) + else() + message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu") + endif() + endforeach() + + list(LENGTH cc_srcs cc_srcs_len) + if (${cc_srcs_len} EQUAL 0) + message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") + endif() + + list(LENGTH cu_srcs cu_srcs_len) + if (${cu_srcs_len} EQUAL 0) + message(WARNING "The op library ${TARGET} not support GPU!") + endif() + + if (WITH_GPU) + nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} + ${op_common_deps}) + else() + cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS} + ${op_common_deps}) + endif() +endfunction() + +op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) From 85c4f488f32d5ca0dfc420f4f7b29fb5a969d946 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sun, 16 Jul 2017 15:17:31 +0800 Subject: [PATCH 18/50] Refactor DDim's product() and add slice_ddim() 1. Refactor DDim's product() to make it more efficiently. 2. Add slice_ddim(). --- paddle/framework/ddim.cc | 41 +++++++++ paddle/framework/ddim.h | 2 + paddle/framework/ddim_test.cc | 17 ++++ paddle/framework/dim.h | 15 ---- paddle/framework/dim_test.cu | 163 +++++++++++++++++----------------- 5 files changed, 142 insertions(+), 96 deletions(-) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index a1ae079f4a..c898b6e322 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -1,4 +1,5 @@ #include "paddle/framework/ddim.h" +#include "paddle/framework/enforce.h" namespace paddle { namespace framework { @@ -190,6 +191,46 @@ ssize_t product(const DDim& ddim) { return boost::apply_visitor(visitor, ddim); } +struct SliceVectorizeVisitor : public boost::static_visitor<> { + std::vector& vector; + int begin; + int end; + + SliceVectorizeVisitor(std::vector& v, int b, int e) + : vector(v), begin(b), end(e) { + PADDLE_ENFORCE(begin < end, + "Begin index must be less than end index in ddim slice."); + PADDLE_ENFORCE(begin >= 0, + "Begin index can't be less than zero in ddim slice."); + } + + template + void operator()(const Dim& dim) { + if (begin == 0) { + vector.push_back(dim.head); + } else { + --begin; + } + --end; + if (end > 0) { + this->operator()(dim.tail); + } + } + + void operator()(const Dim<1>& dim) { + PADDLE_ENFORCE(end == 1, "End index in ddim slice is out of bound."); + vector.push_back(dim.head); + } +}; + +DDim slice_ddim(const DDim& dim, int begin, int end) { + std::vector vec; + vec.reserve(end - begin); + SliceVectorizeVisitor visitor(vec, begin, end); + boost::apply_visitor(visitor, dim); + return make_ddim(vec); +} + ///\cond HIDDEN struct ArityVisitor : boost::static_visitor { diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 223c4180be..675f8680f6 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -81,6 +81,8 @@ std::vector vectorize(const DDim& ddim); ssize_t product(const DDim& ddim); +DDim slice_ddim(const DDim& dim, int begin, int end); + /** * \brief What is the length of this dimension? * diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 8ce7886f8a..408905b00b 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -55,6 +55,23 @@ TEST(DDim, Equality) { EXPECT_EQ( paddle::framework::product(paddle::framework::make_ddim({3, 2, 5, 3})), 90); + + // slice a DDim + paddle::framework::DDim ddim2 = + paddle::framework::make_ddim({1, 2, 3, 4, 5, 6}); + paddle::framework ::DDim ss = paddle::framework::slice_ddim(ddim2, 2, 5); + EXPECT_EQ(arity(ss), 3); + EXPECT_EQ(ss[0], 3); + EXPECT_EQ(ss[1], 4); + EXPECT_EQ(ss[2], 5); + paddle::framework ::DDim ss2 = paddle::framework::slice_ddim(ddim2, 0, 6); + EXPECT_EQ(arity(ss2), 6); + EXPECT_EQ(ss2[0], 1); + EXPECT_EQ(ss2[1], 2); + EXPECT_EQ(ss2[2], 3); + EXPECT_EQ(ss2[3], 4); + EXPECT_EQ(ss2[4], 5); + EXPECT_EQ(ss2[5], 6); } TEST(DDim, Print) { diff --git a/paddle/framework/dim.h b/paddle/framework/dim.h index 8dc1bab06d..883fdc55eb 100644 --- a/paddle/framework/dim.h +++ b/paddle/framework/dim.h @@ -401,20 +401,5 @@ HOSTDEVICE Dim linear_to_dimension(int linear_index, Dim extents) { return result; } -template -Dim slice(const Dim& dim, int begin, int end) { - PADDLE_ENFORCE(begin < end, - "Begin index must be less than end index in Dim slice."); - PADDLE_ENFORCE(begin >= 0 && end <= S && end - begin == D, - "Index error occurs in Dim slice."); - if (begin > 0) { - return slice(dim.tail, begin - 1, end - 1); - } - if (D == 1) { - return Dim<1>(dim.head); - } - return Dim(dim.head, slice(dim.tail, 0, end - 1)); -} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/dim_test.cu b/paddle/framework/dim_test.cu index 0521741519..3898d0a447 100644 --- a/paddle/framework/dim_test.cu +++ b/paddle/framework/dim_test.cu @@ -1,100 +1,101 @@ #include #include -#include "paddle/framework/dim.h" #include "gtest/gtest.h" +#include "paddle/framework/dim.h" __global__ void test(paddle::framework::Dim<2>* o) { - o[0] = paddle::framework::make_dim(5, 6); + o[0] = paddle::framework::make_dim(5, 6); } __global__ void dyn_idx_gpu(int* o) { - auto d = paddle::framework::make_dim(5, 6); - o[0] = d[1]; + auto d = paddle::framework::make_dim(5, 6); + o[0] = d[1]; } TEST(Dim, Equality) { - // construct a Dim on the CPU - auto a = paddle::framework::make_dim(3, 4); - EXPECT_EQ(paddle::framework::get<0>(a), 3); - EXPECT_EQ(paddle::framework::get<1>(a), 4); - - // construct a Dim on the GPU - thrust::device_vector> t(2); - test<<<1,1>>>(thrust::raw_pointer_cast(t.data())); - a = t[0]; - EXPECT_EQ(paddle::framework::get<0>(a), 5); - EXPECT_EQ(paddle::framework::get<1>(a), 6); - - // linearization - auto b = paddle::framework::make_dim(7, 8); - EXPECT_EQ(paddle::framework::linearize(a, b), 83); - - // product - EXPECT_EQ(paddle::framework::product(a), 30); - - // mutate a Dim - paddle::framework::get<1>(b) = 10; - EXPECT_EQ(paddle::framework::get<0>(b), 7); - EXPECT_EQ(paddle::framework::get<1>(b), 10); - - // dynamic access - paddle::framework::get(b, 0) = 8; - b[1] = 11; - EXPECT_EQ(paddle::framework::get<0>(b), 8); - EXPECT_EQ(paddle::framework::get<1>(b), 11); - EXPECT_EQ(paddle::framework::get(b, 0), 8); - EXPECT_EQ(b[1], 11); - - // dynamic access on GPU - thrust::device_vector r(1); - dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data())); - int res = r[0]; - EXPECT_EQ(res, 6); - - // ex_prefix_mul - paddle::framework::Dim<3> c = paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5)); - EXPECT_EQ(paddle::framework::get<0>(c), 1); - EXPECT_EQ(paddle::framework::get<1>(c), 3); - EXPECT_EQ(paddle::framework::get<2>(c), 12); - - // generate from an index - auto size = paddle::framework::make_dim(4, 5, 2); - c = paddle::framework::Dim<3>(14, size); - EXPECT_EQ(paddle::framework::get<0>(c), 2); - EXPECT_EQ(paddle::framework::get<1>(c), 3); - EXPECT_EQ(paddle::framework::get<2>(c), 0); - c = paddle::framework::Dim<3>(25, size); - EXPECT_EQ(paddle::framework::get<0>(c), 1); - EXPECT_EQ(paddle::framework::get<1>(c), 1); - EXPECT_EQ(paddle::framework::get<2>(c), 1); + // construct a Dim on the CPU + auto a = paddle::framework::make_dim(3, 4); + EXPECT_EQ(paddle::framework::get<0>(a), 3); + EXPECT_EQ(paddle::framework::get<1>(a), 4); + + // construct a Dim on the GPU + thrust::device_vector> t(2); + test<<<1, 1>>>(thrust::raw_pointer_cast(t.data())); + a = t[0]; + EXPECT_EQ(paddle::framework::get<0>(a), 5); + EXPECT_EQ(paddle::framework::get<1>(a), 6); + + // linearization + auto b = paddle::framework::make_dim(7, 8); + EXPECT_EQ(paddle::framework::linearize(a, b), 83); + + // product + EXPECT_EQ(paddle::framework::product(a), 30); + + // mutate a Dim + paddle::framework::get<1>(b) = 10; + EXPECT_EQ(paddle::framework::get<0>(b), 7); + EXPECT_EQ(paddle::framework::get<1>(b), 10); + + // dynamic access + paddle::framework::get(b, 0) = 8; + b[1] = 11; + EXPECT_EQ(paddle::framework::get<0>(b), 8); + EXPECT_EQ(paddle::framework::get<1>(b), 11); + EXPECT_EQ(paddle::framework::get(b, 0), 8); + EXPECT_EQ(b[1], 11); + + // dynamic access on GPU + thrust::device_vector r(1); + dyn_idx_gpu<<<1, 1>>>(thrust::raw_pointer_cast(r.data())); + int res = r[0]; + EXPECT_EQ(res, 6); + + // ex_prefix_mul + paddle::framework::Dim<3> c = + paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5)); + EXPECT_EQ(paddle::framework::get<0>(c), 1); + EXPECT_EQ(paddle::framework::get<1>(c), 3); + EXPECT_EQ(paddle::framework::get<2>(c), 12); + + // generate from an index + auto size = paddle::framework::make_dim(4, 5, 2); + c = paddle::framework::Dim<3>(14, size); + EXPECT_EQ(paddle::framework::get<0>(c), 2); + EXPECT_EQ(paddle::framework::get<1>(c), 3); + EXPECT_EQ(paddle::framework::get<2>(c), 0); + c = paddle::framework::Dim<3>(25, size); + EXPECT_EQ(paddle::framework::get<0>(c), 1); + EXPECT_EQ(paddle::framework::get<1>(c), 1); + EXPECT_EQ(paddle::framework::get<2>(c), 1); } TEST(Dim, Bool) { - auto a = paddle::framework::make_dim(3, 4); - auto b = paddle::framework::make_dim(5, 6); - auto c = paddle::framework::make_dim(3, 4); - - // in_bounds check - EXPECT_TRUE(paddle::framework::contained(a, b)); - EXPECT_FALSE(paddle::framework::contained(b, a)); - - // comparison - EXPECT_TRUE(a == a); - EXPECT_FALSE(a == b); - EXPECT_TRUE(a == c); + auto a = paddle::framework::make_dim(3, 4); + auto b = paddle::framework::make_dim(5, 6); + auto c = paddle::framework::make_dim(3, 4); + + // in_bounds check + EXPECT_TRUE(paddle::framework::contained(a, b)); + EXPECT_FALSE(paddle::framework::contained(b, a)); + + // comparison + EXPECT_TRUE(a == a); + EXPECT_FALSE(a == b); + EXPECT_TRUE(a == c); } TEST(Dim, Print) { - { - std::stringstream ss; - auto a = paddle::framework::make_dim(2, 3); - ss << a; - EXPECT_EQ(ss.str(), "2, 3"); - } - { - std::stringstream ss; - ss << paddle::framework::make_dim(8); - EXPECT_EQ(ss.str(), "8"); - } + { + std::stringstream ss; + auto a = paddle::framework::make_dim(2, 3); + ss << a; + EXPECT_EQ(ss.str(), "2, 3"); + } + { + std::stringstream ss; + ss << paddle::framework::make_dim(8); + EXPECT_EQ(ss.str(), "8"); + } } From 7de10ee7e2c9febbe81b22ad83e3e4584ba4a988 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sun, 16 Jul 2017 16:35:44 +0800 Subject: [PATCH 19/50] formate code --- paddle/framework/ddim_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 408905b00b..6a099f2aeb 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -59,12 +59,12 @@ TEST(DDim, Equality) { // slice a DDim paddle::framework::DDim ddim2 = paddle::framework::make_ddim({1, 2, 3, 4, 5, 6}); - paddle::framework ::DDim ss = paddle::framework::slice_ddim(ddim2, 2, 5); + paddle::framework::DDim ss = paddle::framework::slice_ddim(ddim2, 2, 5); EXPECT_EQ(arity(ss), 3); EXPECT_EQ(ss[0], 3); EXPECT_EQ(ss[1], 4); EXPECT_EQ(ss[2], 5); - paddle::framework ::DDim ss2 = paddle::framework::slice_ddim(ddim2, 0, 6); + paddle::framework::DDim ss2 = paddle::framework::slice_ddim(ddim2, 0, 6); EXPECT_EQ(arity(ss2), 6); EXPECT_EQ(ss2[0], 1); EXPECT_EQ(ss2[1], 2); From d649dbf442bd7ba4ce63a2a4e479a27c8d40ca8d Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 17 Jul 2017 09:40:06 +0800 Subject: [PATCH 20/50] implement add_op kernel --- paddle/framework/operator.cc | 8 +++-- paddle/framework/operator.h | 59 +++++++++++++++---------------- paddle/framework/tensor.h | 6 ++-- paddle/operators/add_op.cc | 6 ++-- paddle/operators/add_op.cu | 5 ++- paddle/operators/add_op.h | 13 ++++--- paddle/platform/device_context.cc | 9 ++--- paddle/platform/device_context.h | 13 +++---- 8 files changed, 58 insertions(+), 61 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 25d120c9a9..3c6376c150 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -18,13 +18,15 @@ namespace paddle { namespace framework { template <> -DeviceType* KernelContext::get_eigen_device() { - return device_context_.get_eigen_device(); +Eigen::DefaultDevice* OpKernel::KernelContext::get_eigen_device< + platform::CPUPlace, Eigen::DefaultDevice>() const { + return device_context_.get_eigen_device(); } #ifndef PADDLE_ONLY_CPU template <> -DeviceType* KernelContext::get_eigen_device() { +DeviceType* OpKernel::KernelContext::get_eigen_device() + const { return device_context_.get_eigen_device(); } #endif diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 48cfeeb731..558d4a0b67 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -33,13 +33,13 @@ template struct EigenDeviceConverter; template <> -struct EigenDeviceConverter { +struct EigenDeviceConverter { using EigenDeviceType = Eigen::DefaultDevice; }; #ifndef PADDLE_ONLY_CPU template <> -struct EigenDeviceConverter { +struct EigenDeviceConverter { using EigenDeviceType = Eigen::GpuDevice; }; #endif @@ -87,39 +87,38 @@ class OperatorBase { AttributeMap attrs_; }; -/** - * KernelContext is the only parameter of Kernel Run function. - * Run will get input/output variables, state such as momentum and - * device resource such as CUDA stream, cublas handle, etc. from - * KernelContext. User should construct it before run the Operator. - */ -class KernelContext { +class OpKernel { public: - KernelContext(const OperatorBase* op, const std::shared_ptr& scope, - const platform::DeviceContext& device_context) - : op_(*op), scope_(scope), device_context_(device_context) {} - - const Variable* Input(int index) const { - return scope_->GetVariable(op_.inputs_[index]); - } - - Variable* Output(int index) const { - return scope_->GetVariable(op_.outputs_[index]); - } + /** + * KernelContext is the only parameter of Kernel Run function. + * Run will get input/output variables, state such as momentum and + * device resource such as CUDA stream, cublas handle, etc. from + * KernelContext. User should construct it before run the Operator. + */ + class KernelContext { + public: + KernelContext(const OperatorBase* op, const std::shared_ptr& scope, + const platform::DeviceContext& device_context) + : op_(*op), scope_(scope), device_context_(device_context) {} + + const Variable* Input(int index) const { + return scope_->GetVariable(op_.inputs_[index]); + } - platform::DeviceContext& device_context() const { return device_context_; } + Variable* Output(int index) const { + return scope_->GetVariable(op_.outputs_[index]); + } - template ::EigenDeviceType> - DeviceType* get_eigen_device(); + template ::EigenDeviceType> + DeviceType* get_eigen_device() const; - const OperatorBase& op_; - const std::shared_ptr& scope_; - const platform::DeviceContext& device_context_; -}; + const OperatorBase& op_; + const std::shared_ptr& scope_; + const platform::DeviceContext& device_context_; + }; -class OpKernel { - public: virtual void Compute(const KernelContext& context) const = 0; virtual ~OpKernel() {} diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 01244f617c..784d52cc42 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -35,7 +35,7 @@ class Tensor { template - const T* data() const { + T* data() const { PADDLE_ENFORCE( holder_ != nullptr, "Tenosr has not been initialized. Call Tensor::mutable_data first."); @@ -90,7 +90,7 @@ class Tensor { // flat to rank = 1 template typename TTypes::Flat flat() { - return shaped({NumElements()}); + return shaped(make_ddim({static_cast(NumElements())})); } // to TensorType Vec @@ -114,7 +114,7 @@ class Tensor { template typename TTypes::ConstFlat flat() const { - return shaped({NumElements()}); + return shaped(make_ddim({static_cast(NumElements())})); } template diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index ef39e426fd..7dc6414af2 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -40,6 +40,6 @@ The equation is: Out = X + Y } // namespace paddle REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); -REGISTER_OP_CPU_KERNEL( - add_two, - ::paddle::operators::AddKernel<::paddle::platform::CPUPlace, float>); \ No newline at end of file +typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> + AddKernel_CPU_float; +REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float); \ No newline at end of file diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index f4a4fb16a6..0edf142ee4 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,7 +1,6 @@ -#define EIGEN_USE_GPU - #include "paddle/operators/add_op.h" #include "paddle/framework/op_registry.h" +typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float; REGISTER_OP_GPU_KERNEL(add_two, - paddle::operators::AddKernel); \ No newline at end of file + AddKernel_GPU_float); \ No newline at end of file diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 27a477a3ac..568cb19742 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -6,19 +6,18 @@ namespace paddle { namespace operators { -// Place can be CPUPlace or GPUPlace -template +template class AddKernel : public framework::OpKernel { public: void Compute(const KernelContext& context) const override { - auto* input0 = context.Input(0); - auto* input1 = context.Input(1); + auto input0 = context.Input(0)->Get(); + auto input1 = context.Input(1)->Get(); + auto* output = context.Output(0)->GetMutable(); - auto* output = context.Output(0); - output->mutable_data(Place()); + output->mutable_data(Place()); output->flat().device(*(context.get_eigen_device())) = - input0->flat() + input1->flat(); + input0.flat() + input1.flat(); } }; diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index 960ef0a595..9c1d94e9e7 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -15,14 +15,15 @@ namespace paddle { namespace platform { template <> -Eigen::DefaultDevice* DeviceContext::get_eigen_device() { - return reinterpret_cast(this)->eigen_device(); +Eigen::DefaultDevice* DeviceContext::get_eigen_device() + const { + return reinterpret_cast(this)->eigen_device(); } #ifndef PADDLE_ONLY_CPU template <> -Eigen::GpuDevice* DeviceContext::get_eigen_device() { - return reinterpret_cast(this)->eigen_device(); +Eigen::GpuDevice* DeviceContext::get_eigen_device() const { + return reinterpret_cast(this)->eigen_device(); } #endif diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 7de07d06be..2ec7b05599 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -32,17 +32,14 @@ class DeviceContext { virtual Place GetPlace() const = 0; template - DeviceType* get_eigen_device(); + DeviceType* get_eigen_device() const; }; class CPUDeviceContext : public DeviceContext { public: - Eigen::DefaultDevice* eigen_device() { - if (!eigen_device_) { - eigen_device_.reset(new Eigen::DefaultDevice()); - } - return eigen_device_.get(); - } + CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); } + + Eigen::DefaultDevice* eigen_device() const { return eigen_device_.get(); } Place GetPlace() const override { Place retv = CPUPlace(); @@ -91,7 +88,7 @@ class CUDADeviceContext : public DeviceContext { cudaStream_t stream() { return stream_; } - Eigen::GpuDevice* eigen_device() { return eigen_device_.get(); } + Eigen::GpuDevice* eigen_device() const { return eigen_device_.get(); } cublasHandle_t cublas_handle() { if (!blas_handle_) { From 65dbeb6a24a0362fb696e9f67b3effc1691d4d9e Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 17 Jul 2017 03:01:33 +0000 Subject: [PATCH 21/50] fix gpu build error --- paddle/framework/operator.cc | 6 +++--- paddle/function/RowConvOpGpu.cu | 21 +++++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index aa859591f0..946bde5734 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -25,9 +25,9 @@ Eigen::DefaultDevice* OpKernel::KernelContext::get_eigen_device< #ifndef PADDLE_ONLY_CPU template <> -DeviceType* OpKernel::KernelContext::get_eigen_device() - const { - return device_context_.get_eigen_device(); +Eigen::GpuDevice* OpKernel::KernelContext::get_eigen_device< + platform::GPUPlace, Eigen::GpuDevice>() const { + return device_context_.get_eigen_device(); } #endif diff --git a/paddle/function/RowConvOpGpu.cu b/paddle/function/RowConvOpGpu.cu index c0b947e224..d9dcc7d59d 100644 --- a/paddle/function/RowConvOpGpu.cu +++ b/paddle/function/RowConvOpGpu.cu @@ -32,7 +32,7 @@ __global__ void KeRowConv(real* y, const real* x, const real* w, for (int i = tidy; i < context; i += blky) { sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0; } - + __syncthreads(); for (int i = 0; i < numSeq; ++i) { @@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy, int yoff = start + j; // transpose - sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; - sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0; + sh_x[tidx][tidy] = (xoff < width && yoff < end) ? + x[yoff * width + xoff] : 0.0; + sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? + dy[yoff * width + xoff] : 0.0; __syncthreads(); if (tidy < (context - 1)) { yoff = yoff - context + 1; - sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0; + sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? + dy[yoff * width + xoff] : 0.0; } __syncthreads(); @@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy, int yoff = start + j; // transpose - sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; + sh_x[tidx][tidy] = (xoff < width && yoff < end) ? + x[yoff * width + xoff] : 0.0; __syncthreads(); for (int t = 0; t < context; t++) { - sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; + sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && + yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; __syncthreads(); real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx]; @@ -239,7 +244,7 @@ __global__ void KeRowConvBwData(real* dx, const real* w, const real* dy, for (int i = tidy; i < context; i += blky) { sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0; } - + __syncthreads(); for (int i = 0; i < numSeq; ++i) { @@ -312,7 +317,7 @@ void RowConvGrad(const GpuMatrix& outG, dim3 dimBlock(32, 32); dim3 dimGrid(DIVUP(width, dimBlock.x), 1); real* dw = filterG.getData(); - if (contextLength <= 32) { + if (contextLength <= 32) { KeRowConvBwWeight<32, 32, 32> <<>> (dw, x, dy, starts, height, width, numSeq, contextLength); From 778a1a9f8780405a32814d3f95accccb4304dc87 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 17 Jul 2017 12:03:46 +0800 Subject: [PATCH 22/50] add a comment for --- paddle/framework/ddim.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 675f8680f6..df26d73d48 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -81,6 +81,13 @@ std::vector vectorize(const DDim& ddim); ssize_t product(const DDim& ddim); +/** + * \brief Slice a ddim + * + * Slice dim with [begin, end). + * e.g. DDim d = make_ddim({1,2,3,4,5}); + * slice_ddim(d, 1, 3); ====> {2,3} + */ DDim slice_ddim(const DDim& dim, int begin, int end); /** From c5bc126762031231eb8a144d3318c9dcbaea68ed Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 12:42:04 +0800 Subject: [PATCH 23/50] Follow comment, rename to `GenerateTempVariableName` --- paddle/framework/op_registry.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index b627b4a60a..ec237950df 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -220,7 +220,7 @@ class OpRegistry { op_checkers().at(op_type).Check(op->attrs_); //! Convert Temporary variable name to an unique variable name. - AssignTempVariable(op.get()); + GenerateTempVariableName(op.get()); //! Other op's custom Init for a complex Op. For simple Op, the Init //! method do nothing. @@ -234,7 +234,7 @@ class OpRegistry { }; private: - static void AssignTempVariable(OperatorBase* op) { + static void GenerateTempVariableName(OperatorBase* op) { static std::atomic gUniqId(0UL); for (auto& outname : op->outputs_) { if (outname == OperatorBase::TMP_VAR_NAME()) { From cdec5634492ed088e8c0792aafbbc43de91f6692 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Mon, 17 Jul 2017 13:11:15 +0800 Subject: [PATCH 24/50] Add enforce switch for convient develop (#2850) * add NDEBUG switch to PADDLE_ENFORCE --- paddle/framework/CMakeLists.txt | 10 ++++++---- paddle/framework/enforce.cc | 15 +++++++++++++++ paddle/framework/enforce.h | 6 ++++++ 3 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 paddle/framework/enforce.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index cc5b05ff0d..824d34d016 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,21 +2,23 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim glog gflags) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) -cc_test(enforce_test SRCS enforce_test.cc) +cc_library(enforce SRCS enforce.cc DEPS glog gflags) +cc_test(enforce_test SRCS enforce_test.cc DEPS enforce) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) proto_library(op_desc SRCS op_desc.proto DEPS attr_type) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) -cc_library(operator SRCS operator.cc DEPS op_desc device_context) +cc_library(operator SRCS operator.cc DEPS op_desc device_context enforce) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) -cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc) +cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator) + py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) diff --git a/paddle/framework/enforce.cc b/paddle/framework/enforce.cc new file mode 100644 index 0000000000..644930ff98 --- /dev/null +++ b/paddle/framework/enforce.cc @@ -0,0 +1,15 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/enforce.h" diff --git a/paddle/framework/enforce.h b/paddle/framework/enforce.h index 56cb7f9564..ffce8148e9 100644 --- a/paddle/framework/enforce.h +++ b/paddle/framework/enforce.h @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include #include @@ -58,12 +59,17 @@ class EnforceNotMet : public std::exception { /** * @brief Enforce a condition, otherwise throw an EnforceNotMet */ +#ifdef NDEBUG #define PADDLE_ENFORCE(condition, ...) \ do { \ if (UNLIKELY(!(condition))) { \ PADDLE_THROW(__VA_ARGS__); \ } \ } while (0) +#else +#define PADDLE_ENFORCE(condition, ...) \ + CHECK(condition) << ::paddle::string::Sprintf(__VA_ARGS__); +#endif } // namespace framework } // namespace paddle From c78a5e5da24e7e7edc7d5cfd92b349f3913773ac Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 13:11:47 +0800 Subject: [PATCH 25/50] Fix merge error before --- python/paddle/v2/optimizer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 260a509469..ba58198033 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,4 +1,3 @@ -import py_paddle.swig_paddle as swig_api import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.optimizers as v1_optimizers """ @@ -17,6 +16,7 @@ __all__ = [ class Optimizer(object): def __init__(self, **kwargs): + import py_paddle.swig_paddle as swig_api if 'batch_size' in kwargs: del kwargs['batch_size'] # not important for python library. @@ -25,8 +25,6 @@ class Optimizer(object): self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config( __impl__) - if swig_api is None: - raise RuntimeError("paddle.v2 currently need swig_paddle") self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto( self.__opt_conf_proto__) @@ -37,18 +35,22 @@ class Optimizer(object): For each optimizer(SGD, Adam), GradientMachine should enable different buffers. """ + import py_paddle.swig_paddle as swig_api tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__) assert isinstance(tmp, swig_api.ParameterOptimizer) return tmp.getParameterTypes() def __create_local_updater__(self): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) def __create_remote_updater__(self, pass_num, use_sparse_updater): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createRemoteUpdater( self.__opt_conf__, pass_num, use_sparse_updater) def __create_new_remote_updater__(self, pserver_spec, use_etcd): + import py_paddle.swig_paddle as swig_api return swig_api.ParameterUpdater.createNewRemoteUpdater( self.__opt_conf__, pserver_spec, use_etcd) From 8a3e7353078b01d2d1ba133b6eb1e24ea0d20314 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 17 Jul 2017 05:57:03 +0000 Subject: [PATCH 26/50] Delete the blank line at the end of script file build_android.sh. --- paddle/scripts/docker/build_android.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 53e1b818cb..56d290be4a 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -21,4 +21,3 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ .. make -j `nproc` make install - From 80a26a63083cf002567cd2363d9d722ae94d17d2 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Mon, 17 Jul 2017 14:16:47 +0800 Subject: [PATCH 27/50] check duplicate of ProtoAndCheckerMaker (#2903) --- paddle/framework/op_registry.h | 31 +++++++++++++++++------- paddle/framework/op_registry_test.cc | 36 ++++++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 24f56b2812..41bdb65f8e 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -61,7 +61,14 @@ class OpProtoAndCheckerMaker { OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : proto_(proto), op_checker_(op_checker) {} - ~OpProtoAndCheckerMaker() { CheckNoDuplicatedAttrs(); } + ~OpProtoAndCheckerMaker() { + PADDLE_ENFORCE(validated_, "should call Validate after build"); + } + + void Validate() { + validated_ = true; + CheckNoDuplicatedInOutAttrs(); + } protected: void AddInput(const std::string& name, const std::string& comment, @@ -163,19 +170,26 @@ Add a mark to which output is temporary is helpful for future optimization. } } - void CheckNoDuplicatedAttrs() { + void CheckNoDuplicatedInOutAttrs() { std::unordered_set names; - size_t cnt = 0; + auto checker = [&](const std::string& name) { + PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name); + names.insert(name); + }; for (auto& attr : proto_->attrs()) { - names.insert(attr.name()); - ++cnt; + checker(attr.name()); + } + for (auto& input : proto_->inputs()) { + checker(input.name()); + } + for (auto& output : proto_->outputs()) { + checker(output.name()); } - PADDLE_ENFORCE(names.size() == cnt, - "Cannot register two attribute in same name!"); } OpProto* proto_; OpAttrChecker* op_checker_; + bool validated_{false}; bool has_multiple_input_{false}; bool has_multiple_output_{false}; bool has_temporary_output_{false}; @@ -190,7 +204,8 @@ class OpRegistry { creators()[op_type] = [] { return new OpType; }; OpProto& op_proto = protos()[op_type]; OpAttrChecker& op_checker = op_checkers()[op_type]; - ProtoMakerType(&op_proto, &op_checker); + auto maker = ProtoMakerType(&op_proto, &op_checker); + maker.Validate(); *op_proto.mutable_type() = op_type; PADDLE_ENFORCE( op_proto.IsInitialized(), diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 4791d4aaab..d3a51a361a 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -1,6 +1,8 @@ #include "paddle/framework/op_registry.h" #include +namespace pd = paddle::framework; + namespace paddle { namespace framework { class CosineOp : public OperatorBase { @@ -28,8 +30,6 @@ class MyTestOp : public OperatorBase { void InferShape(const ScopePtr& scope) const override {} void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const override {} - - public: }; class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -182,3 +182,35 @@ TEST(OpRegistry, CustomChecker) { int test_attr = op->GetAttr("test_attr"); ASSERT_EQ(test_attr, 4); } + +class TestAttrProtoMaker : public pd::OpProtoAndCheckerMaker { + public: + TestAttrProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddAttr("scale", "scale of test op"); + AddAttr("scale", "scale of test op"); + } +}; + +TEST(ProtoMaker, DuplicatedAttr) { + pd::OpProto op_proto; + pd::OpAttrChecker op_checker; + auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); + ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet); +} + +class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker { + public: + TestInOutProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("input", "input of test op"); + AddInput("input", "input of test op"); + } +}; + +TEST(ProtoMaker, DuplicatedInOut) { + pd::OpProto op_proto; + pd::OpAttrChecker op_checker; + auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); + ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet); +} From 38310f9349fedfeaac054eb6283f6c1a54ff5327 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 14:30:35 +0800 Subject: [PATCH 28/50] Refine CMake dependencies graph --- paddle/framework/CMakeLists.txt | 10 +++++----- paddle/framework/tensor.cc | 19 +++++++++++++++++++ paddle/operators/CMakeLists.txt | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 paddle/framework/tensor.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 824d34d016..e7d1c7203a 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,19 +1,19 @@ -# ddim lib +cc_library(enforce SRCS enforce.cc DEPS glog) +cc_test(enforce_test SRCS enforce_test.cc DEPS enforce) cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_test(tensor_test SRCS tensor_test.cc DEPS ddim glog gflags) +cc_library(tensor SRCS tensor.cc DEPS ddim place enforce paddle_memory) +cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) -cc_library(enforce SRCS enforce.cc DEPS glog gflags) -cc_test(enforce_test SRCS enforce_test.cc DEPS enforce) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) proto_library(op_desc SRCS op_desc.proto DEPS attr_type) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) -cc_library(operator SRCS operator.cc DEPS op_desc device_context enforce) +cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce) diff --git a/paddle/framework/tensor.cc b/paddle/framework/tensor.cc new file mode 100644 index 0000000000..964f15ab66 --- /dev/null +++ b/paddle/framework/tensor.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include + +namespace paddle { +namespace framework {} +} // namespace paddle diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index b2ea8eb344..441b9e30c4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -4,7 +4,7 @@ function(op_library TARGET) # for ops. set(cc_srcs) set(cu_srcs) - set(op_common_deps operator op_registry glog ddim) + set(op_common_deps operator op_registry) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) From 5017b154689bd8cb595c1d37a54cb2fd072488bc Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 17 Jul 2017 15:37:42 +0800 Subject: [PATCH 29/50] refactor tensor mutable_data --- paddle/framework/operator.h | 14 +++++++------- paddle/framework/tensor.h | 22 ++++++++++------------ paddle/platform/device_context.h | 4 ++-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index c48d990eb2..e6cae9c32b 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -14,17 +14,17 @@ limitations under the License. */ #pragma once -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include "paddle/framework/attr_checker.h" +#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" +#include "paddle/utils/Error.h" namespace paddle { namespace framework { diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 30e00d0e0f..7ba4b29e7c 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -62,21 +62,19 @@ class Tensor { !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ || holder_->size() < numel_ * sizeof(T) + offset_) { + if (platform::is_cpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); + } #ifdef __CUDACC__ - switch (place.which()) { - case 0: - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); - break; - - case 1: - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); - break; + else if (platform::is_gpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), numel_ * sizeof(T))); } #else - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); + else if (platform::is_gpu_place(place)) { + PADDLE_ENFORCE(true, "GPU not support!"); + } #endif offset_ = 0; } diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 5f8ad15951..f226a75c20 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -20,9 +20,9 @@ limitations under the License. */ #include "paddle/platform/gpu_info.h" #define EIGEN_USE_GPU #endif -#include #include -#include +#include "paddle/platform/place.h" +#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace platform { From 1ed237c11ea1f9f0f68467dcf0f284bc54f9129c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 15:55:06 +0800 Subject: [PATCH 30/50] Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) --- paddle/framework/ddim.cc | 6 +++ paddle/framework/ddim.h | 4 ++ paddle/operators/CMakeLists.txt | 5 +++ paddle/operators/add_op.cc | 3 +- paddle/operators/mul_op.cc | 60 +++++++++++++++++++++++++++++ paddle/operators/mul_op.cu | 20 ++++++++++ paddle/operators/mul_op.h | 30 +++++++++++++++ paddle/operators/rowwise_add_op.cc | 61 ++++++++++++++++++++++++++++++ paddle/operators/rowwise_add_op.cu | 5 +++ paddle/operators/rowwise_add_op.h | 31 +++++++++++++++ paddle/operators/sigmoid_op.cc | 49 ++++++++++++++++++++++++ paddle/operators/sigmoid_op.cu | 5 +++ paddle/operators/sigmoid_op.h | 31 +++++++++++++++ paddle/operators/softmax_op.cc | 49 ++++++++++++++++++++++++ paddle/operators/softmax_op.cu | 5 +++ paddle/operators/softmax_op.h | 31 +++++++++++++++ paddle/pybind/CMakeLists.txt | 3 +- paddle/pybind/pybind.cc | 4 ++ 18 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 paddle/operators/mul_op.cc create mode 100644 paddle/operators/mul_op.cu create mode 100644 paddle/operators/mul_op.h create mode 100644 paddle/operators/rowwise_add_op.cc create mode 100644 paddle/operators/rowwise_add_op.cu create mode 100644 paddle/operators/rowwise_add_op.h create mode 100644 paddle/operators/sigmoid_op.cc create mode 100644 paddle/operators/sigmoid_op.cu create mode 100644 paddle/operators/sigmoid_op.h create mode 100644 paddle/operators/softmax_op.cc create mode 100644 paddle/operators/softmax_op.cu create mode 100644 paddle/operators/softmax_op.h diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 73f5499ad1..f3dd396613 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -278,5 +278,11 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { return os; } +ssize_t DDim::size() const { return product(*this); } + +DDim::DDim(std::initializer_list init_list) { + *this = make_ddim(init_list); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index a0c2a8a74a..3976c6c029 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -29,6 +29,8 @@ struct DDim { template explicit DDim(const Dim& in) : var(in) {} + /*implicit*/ DDim(std::initializer_list init_list); + template DDim& operator=(const Dim& in) { var = in; @@ -57,6 +59,8 @@ struct DDim { DDim operator+(DDim d) const; DDim operator*(DDim d) const; + + ssize_t size() const; }; /** diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 441b9e30c4..f47c3a4208 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -42,3 +42,8 @@ endfunction() op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) + +op_library(mul_op SRCS mul_op.cc mul_op.cu) +op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) +op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc) +op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 522b23cbc4..355c92a504 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -31,8 +31,7 @@ protected: "Inputs/Outputs of AddOp must all be set"); PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(), "Two input of Add Op's dimension must be same."); - // Need set dims in Tensor - // outputs[0]->set_dims(inputs[0]->dims()) + outputs[0]->set_dims(inputs[0]->dims()); } }; diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc new file mode 100644 index 0000000000..713b2a5dc8 --- /dev/null +++ b/paddle/operators/mul_op.cc @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include + +namespace paddle { +namespace operators { + +class MulOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs"); + auto dim0 = inputs[0]->dims(); + auto dim1 = inputs[1]->dims(); + PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2, + "The input of mul op must be matrix"); + PADDLE_ENFORCE( + dim0[1] == dim1[0], + "First matrix's width must be equal with second matrix's height."); + PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output"); + outputs[0]->set_dims({dim0[0], dim1[1]}); + } +}; + +class MulOpMaker : public framework::OpProtoAndCheckerMaker { +public: + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of mul op"); + AddInput("Y", "The second input of mul op"); + AddOutput("Out", "The output of mul op"); + AddComment(R"DOC( +Two Element Mul Operator. + +The equation is: Out = X * Y +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); +REGISTER_OP_CPU_KERNEL( + mul, paddle::operators::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu new file mode 100644 index 0000000000..201723df24 --- /dev/null +++ b/paddle/operators/mul_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include + +REGISTER_OP_GPU_KERNEL(mul, + paddle::operators::MulKernel); \ No newline at end of file diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h new file mode 100644 index 0000000000..ed8d26e136 --- /dev/null +++ b/paddle/operators/mul_op.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include + +namespace paddle { +namespace operators { + +template +class MulKernel : public framework::OpKernel { +public: + void Compute(const KernelContext &context) const override { + LOG(INFO) << "Mul kernel in " << typeid(Place).name(); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc new file mode 100644 index 0000000000..414bafd046 --- /dev/null +++ b/paddle/operators/rowwise_add_op.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +namespace paddle { +namespace operators { + +class RowWiseAddOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add"); + auto dim0 = inputs[0]->dims(); + auto dim1 = inputs[1]->dims(); + + PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix"); + PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); + PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); + PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1"); + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { +public: + RowWiseAddOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The left input of row-wise add op, must be matrix"); + AddInput("b", "The right input of row-wise add op, must be vector"); + AddOutput("Out", "The output of row-wise add op"); + AddComment(R"DOC(Row-wise Add operator + +for i in xrange(X.shape[0]): + Out = X[i] + b +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(rowwise_add, + paddle::operators::RowWiseAddOp, + paddle::operators::RowWiseAddOpMaker); +REGISTER_OP_CPU_KERNEL( + rowwise_add, + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu new file mode 100644 index 0000000000..95e29d1fa3 --- /dev/null +++ b/paddle/operators/rowwise_add_op.cu @@ -0,0 +1,5 @@ +#include +#include + +REGISTER_OP_GPU_KERNEL( + mul, paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h new file mode 100644 index 0000000000..3dfde93ba2 --- /dev/null +++ b/paddle/operators/rowwise_add_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include + +namespace paddle { +namespace operators { + +template +class RowWiseAddKernel : public framework::OpKernel { +public: + void Compute(const KernelContext &context) const override { + LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc new file mode 100644 index 0000000000..45ae277c53 --- /dev/null +++ b/paddle/operators/sigmoid_op.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +namespace paddle { +namespace operators { + +class SigmoidOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input"); + PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output"); + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { +public: + SigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "sigmoid input"); + AddInput("Y", "sigmoid output"); + AddComment("Sigmoid function"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(sigmoid, + paddle::operators::SigmoidOp, + paddle::operators::SigmoidOpMaker); +REGISTER_OP_CPU_KERNEL( + sigmoid, paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu new file mode 100644 index 0000000000..79d5222348 --- /dev/null +++ b/paddle/operators/sigmoid_op.cu @@ -0,0 +1,5 @@ +#include +#include + +REGISTER_OP_GPU_KERNEL( + sigmoid, paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h new file mode 100644 index 0000000000..191aa42e4a --- /dev/null +++ b/paddle/operators/sigmoid_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { +namespace operators { + +template +class SigmoidKernel : public framework::OpKernel { +public: + void Compute(const KernelContext &context) const override { + LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc new file mode 100644 index 0000000000..4ca7be359e --- /dev/null +++ b/paddle/operators/softmax_op.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include +#include + +namespace paddle { +namespace operators { + +class SoftmaxOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector &inputs, + const std::vector &outputs) const override { + PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); + PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); + + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { +public: + SoftmaxOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "input of softmax"); + AddOutput("Y", "output of softmax"); + AddComment("Softmax Op"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); +REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu new file mode 100644 index 0000000000..59f32b35cf --- /dev/null +++ b/paddle/operators/softmax_op.cu @@ -0,0 +1,5 @@ +#include +#include + +REGISTER_OP_GPU_KERNEL( + softmax, paddle::operators::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h new file mode 100644 index 0000000000..fe97c9aafe --- /dev/null +++ b/paddle/operators/softmax_op.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { +namespace operators { + +template +class SoftmaxKernel : public framework::OpKernel { +public: + void Compute(const KernelContext &context) const override { + LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 8564a5f5fe..00b14a9432 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1 +1,2 @@ -cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python add_op) +cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python + add_op mul_op rowwise_add_op sigmoid_op softmax_op) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index c1a025ed04..aa2b84799c 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -24,6 +24,10 @@ namespace py = pybind11; namespace pd = paddle::framework; USE_OP(add_two); +USE_OP(softmax); +USE_OP(mul); +USE_OP(rowwise_add); +USE_OP(sigmoid); PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); From a0caf23430545c12b4f714891d5437559a67ac07 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Mon, 17 Jul 2017 16:03:12 +0800 Subject: [PATCH 31/50] Op varient inputs (#2901) * add inputs * add ut for multiple inputs * fix AddToLayer * op_desc -> op_proto * CreateArgumentOffsetMap -> CreateInOutOffsetMap * move CreateInOutOffsetMap from OperatorBase to op registry * arg_idxs_ -> in_out_idxs_ --- paddle/framework/op_registry.h | 11 +++ paddle/framework/operator.cc | 58 +++++++++++++++ paddle/framework/operator.h | 99 ++++++++++++++++++------- paddle/framework/operator_test.cc | 116 +++++++++++++++++++++++++++--- paddle/operators/add_op.h | 4 +- 5 files changed, 251 insertions(+), 37 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 41bdb65f8e..a84364301a 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -216,21 +216,32 @@ class OpRegistry { static OperatorPtr CreateOp(const OpDesc& op_desc) { std::string op_type = op_desc.type(); OperatorPtr op(creators().at(op_type)()); + const OpProto& op_proto = protos().at(op_type); + // set op's inputs_ from desc. op->type_ = op_desc.type(); op->inputs_.reserve((size_t)op_desc.inputs_size()); std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), std::back_inserter(op->inputs_)); + // set op's outputs_ from desc. op->outputs_.reserve((size_t)op_desc.outputs_size()); std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), std::back_inserter(op->outputs_)); + // set op's attr; for (auto& attr : op_desc.attrs()) { op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); } op_checkers().at(op_type).Check(op->attrs_); + // set argument offsets stored in op. + CreateInOutOffsetMap(op, op_proto); op->Init(); return op; } + // init op.in_out_idxs_ to accelerate argument's offset lookup. + static void CreateInOutOffsetMap(OperatorPtr op, const OpProto& proto) { + op->CreateInOutOffsetMap(proto); + } + static std::unordered_map& protos() { static std::unordered_map protos_; return protos_; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 7756162a87..58a34fca0f 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -12,11 +12,69 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/framework/operator.h" namespace paddle { namespace framework { +void OperatorBase::CreateInOutOffsetMap(const OpProto& proto) { + PADDLE_ENFORCE(in_out_idxs_.empty(), "duplicate call CreateInOutOffsetMap"); + for (int i = 0; i < proto.inputs_size(); i++) { + const auto& name = proto.inputs()[i].name(); + in_out_idxs_[name] = i; + } + for (int i = 0; i < proto.outputs_size(); i++) { + const auto& name = proto.outputs()[i].name(); + in_out_idxs_[name] = i; + } +} + +const std::string& OperatorBase::Input(const std::string& name) const { + auto it = in_out_idxs_.find(name); + PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name); + + if (attrs_.count("input_format") == 0) { + return inputs_[it->second]; + } else { + const auto& input_format = GetAttr>("input_format"); + int idx = input_format[it->second]; + return inputs_.at(idx); + } +} + +std::vector OperatorBase::Inputs(const std::string& name) const { + auto input_format = GetAttr>("input_format"); + auto offset = in_out_idxs_.at(name); + + return std::vector{ + inputs_.begin() + input_format.at(offset), + inputs_.begin() + input_format.at(offset + 1)}; +} + +const std::string& OperatorBase::Output(const std::string& name) const { + auto it = in_out_idxs_.find(name); + PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name); + + if (attrs_.count("output_format") == 0) { + return outputs_[it->second]; + } else { + const auto& output_format = GetAttr>("output_format"); + int idx = output_format[it->second]; + return outputs_.at(idx); + } +} + +std::vector OperatorBase::Outputs(const std::string& name) const { + auto output_format = GetAttr>("output_format"); + auto offset = in_out_idxs_.at(name); + + return std::vector{ + outputs_.begin() + output_format.at(offset), + outputs_.begin() + output_format.at(offset + 1)}; +} + std::string OperatorBase::DebugString() const { std::stringstream ss; ss << "=================\n"; diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index f7ed6e9f3d..6567950ce5 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -14,18 +14,20 @@ limitations under the License. */ #pragma once -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include "paddle/framework/attr_checker.h" +#include "paddle/framework/op_desc.pb.h" +#include "paddle/framework/op_proto.pb.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" +#include "paddle/utils/Error.h" + namespace paddle { namespace framework { @@ -62,11 +64,72 @@ class OperatorBase { virtual void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const = 0; + // Get a input with argument's name described in `op_proto` + const std::string& Input(const std::string& name) const; + // Get a input which has multiple variables. + // TODO add a vector_view to prevent memory copy. + std::vector Inputs(const std::string& name) const; + // Get a output with argument's name described in `op_proto` + const std::string& Output(const std::string& name) const; + // Get an output which has multiple variables. + // TODO add a vector_view to prevent memory copy. + std::vector Outputs(const std::string& name) const; + + // init in_out_idxs_ to accelerate argument's offset lookup. + void CreateInOutOffsetMap(const OpProto& proto); + public: std::string type_; std::vector inputs_; std::vector outputs_; AttributeMap attrs_; + // store the arguments' offset described in op_desc. + std::unordered_map in_out_idxs_; +}; + +class KernelContext { + public: + KernelContext(const OperatorBase* op, const std::shared_ptr& scope, + const platform::DeviceContext& device_context) + : op_(*op), scope_(scope), device_context_(device_context) {} + + const Variable* Input(int index) const { + return scope_->GetVariable(op_.inputs_[index]); + } + + Variable* Output(int index) const { + return scope_->GetVariable(op_.outputs_[index]); + } + + const Variable* Input(const std::string& name) const { + return scope_->GetVariable(op_.Input(name)); + } + + const Variable* Output(const std::string& name) const { + return scope_->GetVariable(op_.Output(name)); + } + + const std::vector Inputs(const std::string& name) const { + auto names = op_.Inputs(name); + std::vector res; + std::transform( + names.begin(), names.end(), res.begin(), + [this](const std::string& name) { return scope_->GetVariable(name); }); + return res; + } + + const std::vector Outputs(const std::string& name) const { + auto names = op_.Outputs(name); + std::vector res; + std::transform( + names.begin(), names.end(), res.begin(), + [this](const std::string& name) { return scope_->GetVariable(name); }); + return res; + } + + const OperatorBase& op_; + const std::shared_ptr& scope_; + const platform::DeviceContext& device_context_; }; class OpKernel { @@ -77,25 +140,6 @@ class OpKernel { * device resource such as CUDA stream, cublas handle, etc. from * KernelContext. User should construct it before run the Operator. */ - class KernelContext { - public: - KernelContext(const OperatorBase* op, const ScopePtr& scope, - const platform::DeviceContext& device_context) - : op_(*op), scope_(scope), device_context_(device_context) {} - - const Variable* Input(int index) const { - return scope_->GetVariable(op_.inputs_[index]); - } - - Variable* Output(int index) const { - return scope_->GetVariable(op_.outputs_[index]); - } - - const OperatorBase& op_; - const ScopePtr& scope_; - const platform::DeviceContext& device_context_; - }; - virtual void Compute(const KernelContext& context) const = 0; virtual ~OpKernel() {} @@ -140,7 +184,7 @@ class OperatorWithKernel : public OperatorBase { void Run(const ScopePtr& scope, const platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); - opKernel->Compute(OpKernel::KernelContext(this, scope, dev_ctx)); + opKernel->Compute(KernelContext(this, scope, dev_ctx)); } static std::unordered_map& @@ -148,6 +192,7 @@ class OperatorWithKernel : public OperatorBase { static std::unordered_map g_all_op_kernels; return g_all_op_kernels; } + void InferShape(const std::shared_ptr& scope) const final { std::vector ins; VarNamesToTensors(scope, inputs_, &ins); diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 19ac4ecafa..6fa110f94c 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -30,7 +30,6 @@ class OpWithoutKernelTest : public OperatorBase { op_run_num++; ASSERT_EQ((int)inputs_.size(), 1); ASSERT_EQ((int)outputs_.size(), 1); - ASSERT_NEAR(GetAttr("scale"), 3.14, 1e-5); ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr); ASSERT_EQ(x, 1); ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr); @@ -86,9 +85,11 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { public: OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("input", "input of test op"); - AddOutput("output", "output of test op"); - AddAttr("scale", "scale of cosine op"); + AddInput("x", "input of test op"); + AddOutput("y", "output of test op"); + AddAttr("scale", "scale of cosine op") + .SetDefault(1.0) + .LargerThan(0.0); AddComment("This is test op"); } }; @@ -103,11 +104,65 @@ class OpWithKernelTest : public OperatorWithKernel { class CPUKernelTest : public OpKernel { public: - void Compute(const KernelContext& context) const { + void Compute(const KernelContext& ctx) const { + std::cout << "this is cpu kernel" << std::endl; + std::cout << ctx.op_.DebugString() << std::endl; cpu_kernel_run_num++; - ASSERT_EQ((int)context.op_.inputs_.size(), 1); - ASSERT_EQ((int)context.op_.outputs_.size(), 1); - ASSERT_NEAR(context.op_.GetAttr("scale"), 3.14, 1e-5); + ASSERT_EQ(ctx.op_.Input("x"), "IN1"); + ASSERT_EQ(ctx.op_.Output("y"), "OUT1"); + } +}; + +// multiple inputs test +class OperatorMultiInputsTest : public OperatorBase { + public: + void Init() override { x = 1; } + void InferShape(const std::shared_ptr& scope) const override {} + void Run(const std::shared_ptr& scope, + const platform::DeviceContext& dev_ctx) const override { + ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr); + ASSERT_EQ(x, 1); + ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr); + ASSERT_EQ(Input("x"), "IN1"); + ASSERT_EQ(Input("y"), "OUT1"); + } + + public: + float x = 0; +}; + +class OpKernelTestMultiInputsProtoAndCheckerMaker + : public OpProtoAndCheckerMaker { + public: + OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto, + OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInputs("xs", "inputs of test op"); + AddInput("k", "input of test op"); + AddOutputs("ys", "outputs of test op"); + AddAttr("scale", "scale of cosine op") + .SetDefault(1.0) + .LargerThan(0.0); + AddComment("This is test op"); + } +}; + +class CPUKernalMultiInputsTest : public OpKernel { + public: + void Compute(const KernelContext& ctx) const { + auto xs = ctx.op_.Inputs("xs"); + ASSERT_EQ(xs.size(), 3UL); + ASSERT_EQ(xs[0], "x0"); + ASSERT_EQ(xs[1], "x1"); + ASSERT_EQ(xs[2], "x2"); + + auto k = ctx.op_.Input("k"); + ASSERT_EQ(k, "k0"); + + auto ys = ctx.op_.Outputs("ys"); + ASSERT_EQ(ys.size(), 2UL); + ASSERT_EQ(ys[0], "y0"); + ASSERT_EQ(ys[1], "y1"); } }; @@ -118,6 +173,7 @@ REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest, paddle::framework::OpKernelTestProtoAndCheckerMaker); REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest); +// test with single input TEST(OpKernel, all) { paddle::framework::OpDesc op_desc; op_desc.set_type("op_with_kernel"); @@ -137,3 +193,47 @@ TEST(OpKernel, all) { op->Run(scope, cpu_device_context); ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1); } + +REGISTER_OP(op_multi_inputs_with_kernel, paddle::framework::OpWithKernelTest, + paddle::framework::OpKernelTestMultiInputsProtoAndCheckerMaker); +REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel, + paddle::framework::CPUKernalMultiInputsTest); + +// test with multi inputs +TEST(OpKernel, multi_inputs) { + using namespace paddle::framework; + + OpDesc op_desc; + op_desc.set_type("op_multi_inputs_with_kernel"); + *op_desc.mutable_inputs()->Add() = "x0"; + *op_desc.mutable_inputs()->Add() = "x1"; + *op_desc.mutable_inputs()->Add() = "x2"; + *op_desc.mutable_inputs()->Add() = "k0"; + *op_desc.mutable_outputs()->Add() = "y0"; + *op_desc.mutable_outputs()->Add() = "y1"; + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_name("scale"); + attr->set_type(paddle::framework::AttrType::FLOAT); + attr->set_f(3.14); + + auto attr0 = op_desc.mutable_attrs()->Add(); + attr0->set_name("input_format"); + attr0->set_type(paddle::framework::AttrType::INTS); + auto input_format = attr0->mutable_ints(); + input_format->Add(0); // x0 + input_format->Add(3); // k + input_format->Add(4); // end + + auto attr1 = op_desc.mutable_attrs()->Add(); + attr1->set_name("output_format"); + attr1->set_type(paddle::framework::AttrType::INTS); + auto output_format = attr1->mutable_ints(); + output_format->Add(0); // y0 + output_format->Add(2); // y1 + + paddle::platform::CPUDeviceContext cpu_device_context; + auto scope = std::make_shared(); + + OperatorPtr op(paddle::framework::OpRegistry::CreateOp(op_desc)); + op->Run(scope, cpu_device_context); +} diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 17d459dbc8..000564f66d 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -8,10 +8,10 @@ namespace operators { template class AddKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { + void Compute(const framework::KernelContext &context) const override { LOG(INFO) << "Add kernel in " << typeid(Place).name(); } }; -} // namespace op +} // namespace operators } // namespace paddle From 2a03e3808d48257a71366f5802aeec052914e1cc Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 17 Jul 2017 16:45:42 +0800 Subject: [PATCH 32/50] set correct place for output tensor --- paddle/framework/operator.cc | 4 ++-- paddle/framework/operator.h | 4 +++- paddle/operators/add_op.h | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 946bde5734..1a7e332227 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -18,14 +18,14 @@ namespace paddle { namespace framework { template <> -Eigen::DefaultDevice* OpKernel::KernelContext::get_eigen_device< +Eigen::DefaultDevice* OpKernel::KernelContext::GetEigenDevice< platform::CPUPlace, Eigen::DefaultDevice>() const { return device_context_.get_eigen_device(); } #ifndef PADDLE_ONLY_CPU template <> -Eigen::GpuDevice* OpKernel::KernelContext::get_eigen_device< +Eigen::GpuDevice* OpKernel::KernelContext::GetEigenDevice< platform::GPUPlace, Eigen::GpuDevice>() const { return device_context_.get_eigen_device(); } diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index e6cae9c32b..b8c5098e49 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -109,7 +109,9 @@ class OpKernel { template ::EigenDeviceType> - DeviceType* get_eigen_device() const; + DeviceType* GetEigenDevice() const; + + platform::Place GetPlace() const { return device_context_.GetPlace(); } const OperatorBase& op_; const ScopePtr& scope_; diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index e8c718669a..e9a793d23b 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -27,9 +27,9 @@ public: auto input1 = context.Input(1)->Get(); auto* output = context.Output(0)->GetMutable(); - output->mutable_data(Place()); + output->mutable_data(context.GetPlace()); - output->flat().device(*(context.get_eigen_device())) = + output->flat().device(*(context.GetEigenDevice())) = input0.flat() + input1.flat(); } }; From 5847b96a61fec031555f245702f604e928f8ad4e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 18:30:40 +0800 Subject: [PATCH 33/50] Follow comments, correct implement `DDim::size()` * Also fix unit test --- paddle/framework/ddim.cc | 2 +- paddle/operators/softmax_op.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index f3dd396613..eb1a18ee40 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -278,7 +278,7 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { return os; } -ssize_t DDim::size() const { return product(*this); } +ssize_t DDim::size() const { return arity(*this); } DDim::DDim(std::initializer_list init_list) { *this = make_ddim(init_list); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index 59f32b35cf..903eef1b62 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,5 +1,5 @@ #include -#include +#include REGISTER_OP_GPU_KERNEL( softmax, paddle::operators::SoftmaxKernel); From 73a9f0f25d86b46fa74fc574e2f443d644bcfb88 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 19:44:12 +0800 Subject: [PATCH 34/50] Merge develop --- paddle/operators/mul_op.h | 3 ++- paddle/operators/rowwise_add_op.h | 2 +- paddle/operators/sigmoid_op.h | 2 +- paddle/operators/softmax_op.h | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ed8d26e136..ce8a0169e0 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -13,6 +13,7 @@ limitations under the License. */ #pragma once + #include #include @@ -22,7 +23,7 @@ namespace operators { template class MulKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { + void Compute(const framework::KernelContext &context) const override { LOG(INFO) << "Mul kernel in " << typeid(Place).name(); } }; diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 3dfde93ba2..35f43e6376 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -22,7 +22,7 @@ namespace operators { template class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { + void Compute(const framework::KernelContext &context) const override { LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); } }; diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 191aa42e4a..42173343f3 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -23,7 +23,7 @@ namespace operators { template class SigmoidKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { + void Compute(const framework::KernelContext &context) const override { LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); } }; diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index fe97c9aafe..74e9e2786b 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -23,7 +23,7 @@ namespace operators { template class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const KernelContext &context) const override { + void Compute(const framework::KernelContext &context) const override { LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); } }; From 78bd815e8504496ccae388bb799cc8026427084c Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 17 Jul 2017 19:48:33 +0800 Subject: [PATCH 35/50] refine conditional compilation and remove `numel_` --- paddle/framework/tensor.h | 40 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 29bad7a00a..b405e3877c 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -27,7 +27,7 @@ namespace framework { class Tensor { public: - Tensor() : numel_(0), offset_(0) {} + Tensor() : offset_(0) {} template const T* data() const { @@ -44,30 +44,26 @@ class Tensor { template T* mutable_data(platform::Place place) { - PADDLE_ENFORCE(numel_ > 0, - "Tensor::numel_ must be larger than zero to call " + PADDLE_ENFORCE(product(dims_) > 0, + "Tensor's numel must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); if (holder_ == nullptr || !(holder_->place() == place) /* some versions of boost::variant don't have operator!= */ - || holder_->size() < numel_ * sizeof(T) + offset_) { + || holder_->size() < product(dims_) * sizeof(T) + offset_) { + if (platform::is_cpu_place(place)) { + holder_.reset(new PlaceholderImpl( + boost::get(place), product(dims_) * sizeof(T))); + } else if (platform::is_gpu_place(place)) { #ifdef __CUDACC__ - switch (place.which()) { - case 0: - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); - break; - - case 1: - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); - break; - } + holder_.reset(new PlaceholderImpl( + boost::get(place), product(dims_) * sizeof(T))); #else - holder_.reset(new PlaceholderImpl( - boost::get(place), numel_ * sizeof(T))); + PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device."); #endif - + } else { + PADDLE_ENFORCE(true, "Unknown 'place'."); + } offset_ = 0; } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + @@ -88,7 +84,7 @@ class Tensor { platform::is_cpu_place(dst_place), "Tensor::CopyFrom only support CPU now."); src.CheckDims(); - size_t size = src.numel_ * sizeof(T); + size_t size = product(src.dims_) * sizeof(T); set_dims(src.dims()); const void* src_ptr = static_cast(src.data()); void* dst_ptr = static_cast(mutable_data(dst_place)); @@ -122,7 +118,6 @@ class Tensor { return; } dims_ = dims; - numel_ = product(dims_); } DDim dims() const { return dims_; } @@ -170,16 +165,15 @@ class Tensor { inline void CheckDims() const { PADDLE_ENFORCE(holder_ != nullptr, "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_, + PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " "first to re-allocate memory."); } std::shared_ptr holder_; // holds the memory block if allocated. DDim dims_; - size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. -}; // namespace framework +}; } // namespace framework } // namespace paddle From 78fa5e307da3cb32706f396346d3db7a875b4178 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 17 Jul 2017 20:00:58 +0800 Subject: [PATCH 36/50] Add DDim::size() --- paddle/framework/ddim.cc | 2 ++ paddle/framework/ddim.h | 2 ++ paddle/framework/ddim_test.cc | 1 + 3 files changed, 5 insertions(+) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 73f5499ad1..b6ad8b60aa 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -117,6 +117,8 @@ int DDim::operator[](int idx) const { return boost::apply_visitor(DynamicConstIndexer(idx), var); } +ssize_t DDim::size() const { return arity(*this); } + bool DDim::operator==(DDim d) const { if (var.which() != d.getVar().which()) { return false; diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index a0c2a8a74a..7bc21a1e34 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -50,6 +50,8 @@ struct DDim { DDimVar getVar() { return var; } + ssize_t size() const; + bool operator==(DDim d) const; bool operator!=(DDim d) const; diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 6a099f2aeb..9d18a2972c 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -49,6 +49,7 @@ TEST(DDim, Equality) { // arity of a DDim EXPECT_EQ(paddle::framework::arity(ddim), 3); + EXPECT_EQ(ddim.size(), 3); // product of a DDim EXPECT_EQ(paddle::framework::product(vddim), 45); From 122e83e36cee629cf3e8c5b0e6222b2160437769 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 21:09:57 +0800 Subject: [PATCH 37/50] Fix unittest --- paddle/operators/rowwise_add_op.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 95e29d1fa3..2c4bfbf93a 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -2,4 +2,5 @@ #include REGISTER_OP_GPU_KERNEL( - mul, paddle::operators::RowWiseAddKernel); + rowwise_add, + paddle::operators::RowWiseAddKernel); From bde90be71bc2758b464960c8e2631ee177c1d9a7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 18:10:18 +0800 Subject: [PATCH 38/50] Read/Write a Tensor Python Basically following http://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html * Use buffer protocol to return a view of Tensor. It can be cast to numpy array in Python. * Set a numpy array to a tensor. --- paddle/framework/tensor.h | 9 +- paddle/pybind/pybind.cc | 142 +++++++++++++++++- .../paddle/v2/framework/tests/test_tensor.py | 45 ++++++ 3 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_tensor.py diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 29bad7a00a..891cf73641 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include "paddle/framework/ddim.h" #include "paddle/framework/enforce.h" #include "paddle/memory/memory.h" @@ -127,6 +128,10 @@ class Tensor { DDim dims() const { return dims_; } + platform::Place place() const { return holder_->place(); } + + std::type_index type() const { return holder_->type(); } + private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. @@ -135,6 +140,7 @@ class Tensor { virtual void* ptr() const = 0; virtual platform::Place place() const = 0; virtual size_t size() const = 0; + virtual std::type_index type() const = 0; }; template @@ -159,7 +165,8 @@ class Tensor { virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } - virtual platform::Place place() const { return place_; } + virtual paddle::platform::Place place() const { return place_; } + virtual std::type_index type() const { return std::type_index(typeid(T)); } std::unique_ptr> ptr_; platform::Place place_; // record the place of ptr_. diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index b5ead21fd0..8222323e36 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -25,9 +26,143 @@ namespace pd = paddle::framework; USE_OP(add_two); +struct PlaceDebugString : public boost::static_visitor { + std::string operator()(const paddle::platform::GPUPlace& place) const { + return "GPU(" + std::to_string(place.device) + ")"; + } + + std::string operator()(const paddle::platform::CPUPlace& place) const { + return "CPU"; + } +}; + +template +struct TensorToPyBuffer { + pd::Tensor& self_; + explicit TensorToPyBuffer(pd::Tensor& self) : self_(self) {} + + bool CanCast() const { return std::type_index(typeid(T)) == self_.type(); } + + py::buffer_info Cast() const { + auto dim_vec = pd::vectorize(self_.dims()); + std::vector dims_outside; + std::vector strides; + dims_outside.resize(dim_vec.size()); + strides.resize(dim_vec.size()); + + size_t prod = 1; + for (size_t i = dim_vec.size(); i != 0; --i) { + dims_outside[i - 1] = (size_t)dim_vec[i - 1]; + strides[i - 1] = sizeof(float) * prod; + prod *= dims_outside[i - 1]; + } + + return py::buffer_info(self_.mutable_data(self_.place()), + sizeof(T), + py::format_descriptor::format(), + (size_t)pd::arity(self_.dims()), + dims_outside, + strides); + } +}; + +template +struct CastToPyBufferImpl; + +template +struct CastToPyBufferImpl { + py::buffer_info operator()(pd::Tensor& tensor) { + PADDLE_THROW("This type of tensor cannot be expose to Python"); + return py::buffer_info(); + } +}; + +template +struct CastToPyBufferImpl { + using CUR_TYPE = typename std::tuple_element>::type; + py::buffer_info operator()(pd::Tensor& tensor) { + TensorToPyBuffer cast_object(tensor); + if (cast_object.CanCast()) { + return cast_object.Cast(); + } else { + constexpr bool less = I + 1 < std::tuple_size>::value; + return CastToPyBufferImpl()(tensor); + } + } +}; + +template +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + for (size_t i = 0; i < vec.size(); ++i) { + os << vec[i]; + if (i + 1 != vec.size()) { + os << ", "; + } + } + return os; +} + +py::buffer_info CastToPyBuffer(pd::Tensor& tensor) { + auto buffer_info = CastToPyBufferImpl()(tensor); + return buffer_info; +} + +template +void PyTensorSet( + pd::Tensor& self, + py::array_t array) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); + } + + self.set_dims(pd::make_ddim(dims)); + auto* dst = self.mutable_data(paddle::platform::CPUPlace()); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); + py::class_( + m, "Place", R"DOC(Device Place Class.)DOC") + .def("__str__", + [](const paddle::platform::Place& self) { + return boost::apply_visitor(PlaceDebugString(), self); + }) + .def("is_gpu", + [](const paddle::platform::Place& self) { + return paddle::platform::is_gpu_place(self); + }) + .def("is_cpu", [](const paddle::platform::Place& self) { + return paddle::platform::is_cpu_place(self); + }); + + py::class_(m, "Tensor", py::buffer_protocol()) + .def("get_place", &pd::Tensor::place) + .def_buffer([](pd::Tensor& self) -> py::buffer_info { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), + "Only CPU tensor can cast to numpy array"); + return CastToPyBuffer(self); + }) + .def("get_dims", + [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) + .def("set_dims", + [](pd::Tensor& self, const std::vector& dim) { + self.set_dims(pd::make_ddim(dim)); + }) + .def("alloc_float", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("alloc_int", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("set", PyTensorSet) + .def("set", PyTensorSet); + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. @@ -38,7 +173,12 @@ All parameter, weight, gradient are variables in Paddle. *var.GetMutable() = val; }) .def("get_int", - [](const pd::Variable& var) -> int { return var.Get(); }); + [](const pd::Variable& var) -> int { return var.Get(); }) + .def("get_tensor", + [](pd::Variable& self) -> pd::Tensor* { + return self.GetMutable(); + }, + py::return_value_policy::reference); py::class_>(m, "Scope") .def(py::init&>()) diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py new file mode 100644 index 0000000000..b72aff3b9c --- /dev/null +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -0,0 +1,45 @@ +import paddle.v2.framework.core as core +import unittest +import numpy + + +class TestScope(unittest.TestCase): + def test_int_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_int() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1 + tensor_array[19, 11] = 2 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertEqual(1.0, tensor_array_2[3, 9]) + self.assertEqual(2.0, tensor_array_2[19, 11]) + + def test_float_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_float() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1.0 + tensor_array[19, 11] = 2.0 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) + self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) + + +if __name__ == '__main__': + unittest.main() From 2b1cac4113690f4090cdde2a57afb905b2804843 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 14 Jul 2017 21:49:30 +0000 Subject: [PATCH 39/50] Handle all unchecked errors Unchecked errors could be handled by: cd go; gometalinter --vendor --disable-all --enable errcheck $(glide nv) --- go/master/client.go | 5 +++- go/master/client_internal_test.go | 22 ++++++++++++++--- go/master/client_test.go | 24 +++++++++++++++--- go/pserver/client/client.go | 2 +- go/pserver/client/client_test.go | 28 +++++++++++++++++---- go/pserver/service.go | 41 +++++++++++++++++++++++-------- 6 files changed, 97 insertions(+), 25 deletions(-) diff --git a/go/master/client.go b/go/master/client.go index de883bf4b9..90b9947097 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -69,7 +69,10 @@ func (c *Client) getRecords() { // We treat a task as finished whenever the last data // instance of the task is read. This is not exactly // correct, but a reasonable approximation. - c.taskFinished(t.Meta.ID) + err = c.taskFinished(t.Meta.ID) + if err != nil { + log.Errorln(err) + } } } diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index 49263474c8..70dc09bf94 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -66,11 +66,21 @@ func TestGetFinishTask(t *testing.T) { for i := 0; i < totalTask*chunkPerTask; i++ { w := recordio.NewWriter(f, -1, -1) - w.Write(nil) + _, err = w.Write(nil) + if err != nil { + panic(err) + } + // call Close to force RecordIO writing a chunk. - w.Close() + err = w.Close() + if err != nil { + panic(err) + } + } + err = f.Close() + if err != nil { + panic(err) } - f.Close() // Manually intialize client to avoid calling c.getRecords() c := &Client{} @@ -79,7 +89,11 @@ func TestGetFinishTask(t *testing.T) { ch := make(chan string, 1) ch <- addr go c.monitorMaster(ch) - c.SetDataset([]string{path}) + err = c.SetDataset([]string{path}) + if err != nil { + panic(err) + } + checkOnePass := func(i int) { var tasks []Task for idx := 0; idx < totalTask; idx++ { diff --git a/go/master/client_test.go b/go/master/client_test.go index 6666d3860c..bc92dc5ac9 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -57,14 +57,30 @@ func TestNextRecord(t *testing.T) { w := recordio.NewWriter(f, -1, -1) for i := 0; i < total; i++ { - w.Write([]byte{byte(i)}) + _, err = w.Write([]byte{byte(i)}) + if err != nil { + panic(err) + } + } + + err = w.Close() + if err != nil { + panic(err) + } + + err = f.Close() + if err != nil { + panic(err) } - w.Close() - f.Close() + curAddr := make(chan string, 1) curAddr <- fmt.Sprintf(":%d", p) c := master.NewClient(curAddr, 10) - c.SetDataset([]string{path}) + err = c.SetDataset([]string{path}) + if err != nil { + panic(err) + } + for pass := 0; pass < 50; pass++ { received := make(map[byte]bool) for i := 0; i < total; i++ { diff --git a/go/pserver/client/client.go b/go/pserver/client/client.go index aa8bfe30c2..b4a45e1c21 100644 --- a/go/pserver/client/client.go +++ b/go/pserver/client/client.go @@ -233,7 +233,7 @@ func (c *Client) Save(path string) error { func strHash(s string) uint32 { h := fnv.New32a() - h.Write([]byte(s)) + _, _ = h.Write([]byte(s)) return h.Sum32() } diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index aab91556b4..5c89882a29 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -79,15 +79,33 @@ func initEtcdClient() { log.Errorf("err %v", err) } ctx, cancel := context.WithTimeout(context.Background(), timeout) - client.Delete(ctx, pserver.PsDesired) - client.Delete(ctx, pserver.PsPath) - client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) + _, err = client.Delete(ctx, pserver.PsDesired) + if err != nil { + panic(err) + } + + _, err = client.Delete(ctx, pserver.PsPath) + if err != nil { + panic(err) + } + + _, err = client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) + if err != nil { + panic(err) + } + ports := initClient() for i := 0; i < numPserver; i++ { - client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) + _, err = client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) + if err != nil { + panic(err) + } } cancel() - client.Close() + err = client.Close() + if err != nil { + panic(err) + } } type selector bool diff --git a/go/pserver/service.go b/go/pserver/service.go index fec2ec61dc..5cb0293b97 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -219,7 +219,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { } // pserver save checkpoint -func (s *Service) doCheckpoint() error { +func (s *Service) doCheckpoint() (err error) { <-s.initialized s.mu.Lock() defer s.mu.Unlock() @@ -237,9 +237,9 @@ func (s *Service) doCheckpoint() error { } var buf bytes.Buffer encoder := gob.NewEncoder(&buf) - err := encoder.Encode(cp) + err = encoder.Encode(cp) if err != nil { - return err + return } cpMeta := checkpointMeta{} @@ -248,10 +248,14 @@ func (s *Service) doCheckpoint() error { h := md5.New() cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes())) - cpMetajson, _ := json.Marshal(cpMeta) + cpMetajson, err := json.Marshal(cpMeta) + if err != nil { + return + } + err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second) if err != nil { - return err + return } if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) { log.Info("checkpoint does not exists.") @@ -264,15 +268,32 @@ func (s *Service) doCheckpoint() error { } } f, err := os.Create(cpMeta.UUID) - defer f.Close() if err != nil { - return err + return } + + defer func() { + closeErr := f.Close() + if closeErr != nil { + if err != nil { + log.Errorln(closeErr) + } else { + // Set closeErr as return value. + err = closeErr + } + } + }() + writer := bufio.NewWriter(f) _, err = writer.Write(buf.Bytes()) - writer.Flush() if err != nil { - return err + return } - return nil + + err = writer.Flush() + if err != nil { + return + } + + return } From 065e5666ed6d87e7736c26d795daf0bc2b6efb2a Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 14 Jul 2017 22:32:55 +0000 Subject: [PATCH 40/50] add gometalinter/errcheck into pre-commit --- .pre-commit-config.yaml | 10 ++++++---- .travis.yml | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61b989dc69..44174d3558 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,10 +21,12 @@ sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 hooks: - id: clang-formater -- repo: https://github.com/dnephin/pre-commit-golang - sha: e4693a4c282b4fc878eda172a929f7a6508e7d16 +- repo: https://github.com/PaddlePaddle/pre-commit-golang + sha: 6bce8cc8a6ce601bcf6feccf6bfbd43fe04ccbeb hooks: - id: go-fmt - files: (.*\.go) + types: [go] - id: go-lint - files: (.*\.go) + types: [go] + - id: gometalinter + types: [go] diff --git a/.travis.yml b/.travis.yml index 2cf7666fb5..376c693602 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,6 +41,8 @@ before_install: - pip install rarfile - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" + - go get -u github.com/alecthomas/gometalinter + - gometalinter --install - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: From 5d7bccb2a38cb09a2cb90781084cfbd58839cf63 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 14 Jul 2017 23:09:53 +0000 Subject: [PATCH 41/50] fix golint errors --- go/pserver/client/c/cclient.go | 12 ++++++------ go/pserver/client/etcd_client.go | 15 +++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index 7ddaceb7ed..d307c92983 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -101,11 +101,11 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client { +func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) - addr := C.GoString(etcd_endpoints) - etcd_client := client.NewEtcd(addr) - c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0)) + addr := C.GoString(etcdEndpoints) + etcdClient := client.NewEtcd(addr) + c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) return add(c) } @@ -124,13 +124,13 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { } //export paddle_init_param -func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { +func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, paramConfig unsafe.Pointer, configLen C.int) C.int { et := pserver.ElementType(param.element_type) name := C.GoString(param.name) content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) pc := pserver.ParameterWithConfig{ Param: pserver.Parameter{Name: name, ElementType: et, Content: content}, - Config: cArrayToSlice(param_config, int(config_len)), + Config: cArrayToSlice(paramConfig, int(configLen)), } c := get(client) err := c.InitParam(pc) diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index 8eb2a4f451..953065b427 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -12,8 +12,7 @@ import ( ) const ( - // DefaultEtcdTimeout is the default etcd timeout - DefaultEtcdTimeout time.Duration = 5 * time.Second + defaultEtcdTimeout time.Duration = 5 * time.Second ) // EtcdClient is used by pserver client that is a part of trainer process. @@ -48,7 +47,7 @@ func (p *EtcdClient) Desired() int { psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) if err != nil { - log.Errorf("psDesired %s invalid %v", psDesired, err) + log.Errorf("psDesired %d invalid %v", psDesired, err) time.Sleep(p.timeout) continue } @@ -67,12 +66,12 @@ func (p *EtcdClient) List() []Server { for { for i := 0; i < psDesired; i++ { ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + cancel() psKey := pserver.PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) resp, err := p.client.Get(ctx, psKey) - cancel() if err != nil { - log.Infof("Get psKey=%s error, %v", psKey, err) + log.Infof("Get psKey= %s error, %v", psKey, err) time.Sleep(p.timeout) continue } @@ -107,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient { for { cli, err = clientv3.New(clientv3.Config{ Endpoints: ep, - DialTimeout: DefaultEtcdTimeout, + DialTimeout: defaultEtcdTimeout, }) if err != nil { log.Errorf("Init etcd connection failed: %v", err) - time.Sleep(DefaultEtcdTimeout) + time.Sleep(defaultEtcdTimeout) continue } break @@ -119,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient { log.Infof("Connected to etcd: %s\n", endpoints) client := &EtcdClient{ client: cli, - timeout: DefaultEtcdTimeout, + timeout: defaultEtcdTimeout, endpoints: ep, } return client From 37624b30ff3b769fdd768c77d2cdd8b55f09481c Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Sat, 15 Jul 2017 00:01:12 +0000 Subject: [PATCH 42/50] Fix Go pre-commit --- .pre-commit-config.yaml | 4 +--- paddle/scripts/travis/check_style.sh | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 44174d3558..b7179c26fe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,11 +22,9 @@ hooks: - id: clang-formater - repo: https://github.com/PaddlePaddle/pre-commit-golang - sha: 6bce8cc8a6ce601bcf6feccf6bfbd43fe04ccbeb + sha: fb3ba0e9e38a516543925e96cef76740b61321ab hooks: - id: go-fmt types: [go] - - id: go-lint - types: [go] - id: gometalinter types: [go] diff --git a/paddle/scripts/travis/check_style.sh b/paddle/scripts/travis/check_style.sh index 4754bdd4c8..8049aeb7b0 100755 --- a/paddle/scripts/travis/check_style.sh +++ b/paddle/scripts/travis/check_style.sh @@ -13,6 +13,11 @@ export PATH=/usr/bin:$PATH pre-commit install clang-format --version +# set up go environment for running gometalinter +mkdir -p $GOPATH/src/github.com/PaddlePaddle/ +ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle +cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd - + if ! pre-commit run -a ; then git diff --exit-code fi From 25e57949cce1dd42ed8532a86712374af1bf8ea8 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Mon, 17 Jul 2017 21:04:12 +0000 Subject: [PATCH 43/50] add more linters, fix errors found by them. --- .pre-commit-config.yaml | 2 +- go/master/c/client.go | 5 ++--- go/master/etcd_client.go | 4 ++-- go/master/inmem_store.go | 2 +- go/master/service.go | 3 +-- go/pserver/client/c/cclient.go | 11 +++++------ go/pserver/etcd_client.go | 9 +++------ go/pserver/optimizer.go | 8 +++----- go/pserver/service.go | 2 +- 9 files changed, 19 insertions(+), 27 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7179c26fe..efb4dcb2df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ hooks: - id: clang-formater - repo: https://github.com/PaddlePaddle/pre-commit-golang - sha: fb3ba0e9e38a516543925e96cef76740b61321ab + sha: 16398aeccf263adaf53b2495eed0406347d76281 hooks: - id: go-fmt types: [go] diff --git a/go/master/c/client.go b/go/master/c/client.go index 31f4311974..2cbe164c7b 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -23,7 +23,6 @@ import ( log "github.com/sirupsen/logrus" ) -var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex var handleMap = make(map[C.paddle_master_client]*master.Client) var curHandle C.paddle_master_client @@ -114,13 +113,13 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { if err != nil { // Error // TODO: return the type of error? - *record = (*C.uchar)(nullPtr) + *record = (*C.uchar)(nil) return -1 } if len(r) == 0 { // Empty record - *record = (*C.uchar)(nullPtr) + *record = (*C.uchar)(nil) return 0 } diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index 04c1394e96..69dc6a8268 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -30,7 +30,7 @@ type EtcdClient struct { // NewEtcdClient creates a new EtcdClient. func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) { log.Debugf("Connecting to etcd at %v", endpoints) - // TODO(helin): gracefully shutdown etcd store. Becuase etcd + // TODO(helin): gracefully shutdown etcd store. Because etcd // store holds a etcd lock, even though the lock will expire // when the lease timeout, we need to implement graceful // shutdown to release the lock. @@ -60,7 +60,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat } log.Debugf("Successfully acquired lock at %s.", lockPath) - put := clientv3.OpPut(addrPath, string(addr)) + put := clientv3.OpPut(addrPath, addr) resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit() if err != nil { return nil, err diff --git a/go/master/inmem_store.go b/go/master/inmem_store.go index bcd549b20e..57e75dc4e0 100644 --- a/go/master/inmem_store.go +++ b/go/master/inmem_store.go @@ -4,7 +4,7 @@ import "sync" // InMemStore is an in memory implementation of Store interface. // -// It does not tolerate the fault that casues the program to crash. +// It does not tolerate the fault that causes the program to crash. type InMemStore struct { mu sync.Mutex buf []byte diff --git a/go/master/service.go b/go/master/service.go index 9cef2270ce..262735f421 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -160,7 +160,7 @@ func (s *Service) recover() (bool, error) { // snapshot *must* be called with s.mu being held. func (s *Service) snapshot() error { - // TOOD(helin): etcd request has a size limit, so the snapshot + // TODO(helin): etcd request has a size limit, so the snapshot // size is limited by the max request size. We should either // divide the snapshot into smaller chunks and save under // different keys, or configure the request size to be big @@ -289,7 +289,6 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) s.taskQueues.Todo = append(s.taskQueues.Todo, t) - return } func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index d307c92983..718b4304c8 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -34,7 +34,6 @@ import ( log "github.com/sirupsen/logrus" ) -var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex var handleMap = make(map[C.paddle_pserver_client]*client.Client) var curHandle C.paddle_pserver_client @@ -63,7 +62,7 @@ func remove(client C.paddle_pserver_client) *client.Client { } func cArrayToSlice(p unsafe.Pointer, len int) []byte { - if p == nullPtr { + if p == nil { return nil } @@ -137,7 +136,7 @@ func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, if err != nil { if err.Error() == pserver.AlreadyInitialized { - log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name) + log.Warningf("parameter %s already initialized, treat paddle_init_param as successful.", name) return C.PSERVER_OK } log.Errorln(err) @@ -153,7 +152,7 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int { err := c.FinishInitParams() if err != nil { if err.Error() == pserver.AlreadyInitialized { - log.Warningln("parameters already initialized, treat paddle_finish_init_params as sucessful.") + log.Warningln("parameters already initialized, treat paddle_finish_init_params as successful.") return C.PSERVER_OK } @@ -223,12 +222,12 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, p := ps[i] param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) - if unsafe.Pointer(param) == nullPtr { + if unsafe.Pointer(param) == nil { log.Errorln("must pre-allocate parameter.") return C.PSERVER_ERROR } - if unsafe.Pointer(param.content) != nullPtr { + if unsafe.Pointer(param.content) != nil { if int(param.content_len) != len(p.Content) { log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) return C.PSERVER_ERROR diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go index 66af4fa0b4..e70e826975 100644 --- a/go/pserver/etcd_client.go +++ b/go/pserver/etcd_client.go @@ -177,10 +177,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er break } } - if registered == true { + if registered { return nil } - return errors.New("not registerd, may due to already have enough pservers") + return errors.New("not registered, may due to already have enough pservers") }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) if err != nil { @@ -211,8 +211,5 @@ func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) err ctx, cancel := context.WithTimeout(context.Background(), timeout) _, err := e.etcdClient.Put(ctx, key, string(value)) cancel() - if err != nil { - return err - } - return nil + return err } diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index d6b7fafd59..151a3f8033 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -14,8 +14,6 @@ import ( log "github.com/sirupsen/logrus" ) -var nullPtr = unsafe.Pointer(uintptr(0)) - type optimizer struct { opt *C.struct_paddle_optimizer elementType ElementType @@ -23,7 +21,7 @@ type optimizer struct { } func cArrayToSlice(p unsafe.Pointer, len int) []byte { - if p == nullPtr { + if p == nil { return nil } @@ -92,8 +90,8 @@ func (o *optimizer) UpdateParameter(g Gradient) error { } func (o *optimizer) Cleanup() { - if unsafe.Pointer(o.opt) != nullPtr { + if unsafe.Pointer(o.opt) != nil { C.paddle_release_optimizer(o.opt) - o.opt = (*C.struct_paddle_optimizer)(nullPtr) + o.opt = (*C.struct_paddle_optimizer)(nil) } } diff --git a/go/pserver/service.go b/go/pserver/service.go index 5cb0293b97..c723959d6b 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -211,7 +211,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // learning optimization methods are stochastic in // nature. This race condition is allowed deliberately // to save the program from making a copy of the - // paramter content. + // parameter content. parameter.Name = name parameter.ElementType = opt.elementType parameter.Content = opt.GetWeights() From a89c7ffa94bc26a879b8978273219980648aaec4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 11:57:31 +0800 Subject: [PATCH 44/50] Make Tensor <--> Numpy interactive in tensor.h * Follow review comments to seperate Tensor Numpy interactive methods in tensor.h. * Simplify logic for `CastToPyBufferImpl`, make it as one struct and in details namespace. * Remove `Scope` expose in Python, since it currently is useless. * Remove some debug functions. --- paddle/pybind/pybind.cc | 118 +----------------- paddle/pybind/tensor.h | 91 ++++++++++++++ .../paddle/v2/framework/tests/CMakeLists.txt | 3 +- 3 files changed, 97 insertions(+), 115 deletions(-) create mode 100644 paddle/pybind/tensor.h diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 8222323e36..e3dc3e718c 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -26,125 +27,14 @@ namespace pd = paddle::framework; USE_OP(add_two); -struct PlaceDebugString : public boost::static_visitor { - std::string operator()(const paddle::platform::GPUPlace& place) const { - return "GPU(" + std::to_string(place.device) + ")"; - } - - std::string operator()(const paddle::platform::CPUPlace& place) const { - return "CPU"; - } -}; - -template -struct TensorToPyBuffer { - pd::Tensor& self_; - explicit TensorToPyBuffer(pd::Tensor& self) : self_(self) {} - - bool CanCast() const { return std::type_index(typeid(T)) == self_.type(); } - - py::buffer_info Cast() const { - auto dim_vec = pd::vectorize(self_.dims()); - std::vector dims_outside; - std::vector strides; - dims_outside.resize(dim_vec.size()); - strides.resize(dim_vec.size()); - - size_t prod = 1; - for (size_t i = dim_vec.size(); i != 0; --i) { - dims_outside[i - 1] = (size_t)dim_vec[i - 1]; - strides[i - 1] = sizeof(float) * prod; - prod *= dims_outside[i - 1]; - } - - return py::buffer_info(self_.mutable_data(self_.place()), - sizeof(T), - py::format_descriptor::format(), - (size_t)pd::arity(self_.dims()), - dims_outside, - strides); - } -}; - -template -struct CastToPyBufferImpl; - -template -struct CastToPyBufferImpl { - py::buffer_info operator()(pd::Tensor& tensor) { - PADDLE_THROW("This type of tensor cannot be expose to Python"); - return py::buffer_info(); - } -}; - -template -struct CastToPyBufferImpl { - using CUR_TYPE = typename std::tuple_element>::type; - py::buffer_info operator()(pd::Tensor& tensor) { - TensorToPyBuffer cast_object(tensor); - if (cast_object.CanCast()) { - return cast_object.Cast(); - } else { - constexpr bool less = I + 1 < std::tuple_size>::value; - return CastToPyBufferImpl()(tensor); - } - } -}; - -template -std::ostream& operator<<(std::ostream& os, const std::vector& vec) { - for (size_t i = 0; i < vec.size(); ++i) { - os << vec[i]; - if (i + 1 != vec.size()) { - os << ", "; - } - } - return os; -} - -py::buffer_info CastToPyBuffer(pd::Tensor& tensor) { - auto buffer_info = CastToPyBufferImpl()(tensor); - return buffer_info; -} - -template -void PyTensorSet( - pd::Tensor& self, - py::array_t array) { - std::vector dims; - dims.reserve(array.ndim()); - for (size_t i = 0; i < array.ndim(); ++i) { - dims.push_back((int)array.shape()[i]); - } - - self.set_dims(pd::make_ddim(dims)); - auto* dst = self.mutable_data(paddle::platform::CPUPlace()); - std::memcpy(dst, array.data(), sizeof(T) * array.size()); -} - PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); - py::class_( - m, "Place", R"DOC(Device Place Class.)DOC") - .def("__str__", - [](const paddle::platform::Place& self) { - return boost::apply_visitor(PlaceDebugString(), self); - }) - .def("is_gpu", - [](const paddle::platform::Place& self) { - return paddle::platform::is_gpu_place(self); - }) - .def("is_cpu", [](const paddle::platform::Place& self) { - return paddle::platform::is_cpu_place(self); - }); - py::class_(m, "Tensor", py::buffer_protocol()) - .def("get_place", &pd::Tensor::place) .def_buffer([](pd::Tensor& self) -> py::buffer_info { PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), "Only CPU tensor can cast to numpy array"); - return CastToPyBuffer(self); + return paddle::pybind::CastToPyBuffer(self); }) .def("get_dims", [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) @@ -160,8 +50,8 @@ PYBIND11_PLUGIN(core) { [](pd::Tensor& self) { self.mutable_data(paddle::platform::CPUPlace()); }) - .def("set", PyTensorSet) - .def("set", PyTensorSet); + .def("set", paddle::pybind::PyTensorSetFromArray) + .def("set", paddle::pybind::PyTensorSetFromArray); py::class_(m, "Variable", R"DOC(Variable Class. diff --git a/paddle/pybind/tensor.h b/paddle/pybind/tensor.h new file mode 100644 index 0000000000..ef07144ad4 --- /dev/null +++ b/paddle/pybind/tensor.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include + +namespace py = pybind11; + +namespace paddle { + +namespace pybind { + +namespace details { + +template +struct CastToPyBufferImpl; + +template +struct CastToPyBufferImpl { + py::buffer_info operator()(framework::Tensor &tensor) { + PADDLE_THROW("This type of tensor cannot be expose to Python"); + return py::buffer_info(); + } +}; + +template +struct CastToPyBufferImpl { + using CUR_TYPE = typename std::tuple_element>::type; + py::buffer_info operator()(framework::Tensor &tensor) { + if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) { + auto dim_vec = framework::vectorize(tensor.dims()); + std::vector dims_outside; + std::vector strides; + dims_outside.resize(dim_vec.size()); + strides.resize(dim_vec.size()); + + size_t prod = 1; + for (size_t i = dim_vec.size(); i != 0; --i) { + dims_outside[i - 1] = (size_t)dim_vec[i - 1]; + strides[i - 1] = sizeof(CUR_TYPE) * prod; + prod *= dims_outside[i - 1]; + } + + return py::buffer_info(tensor.mutable_data(tensor.place()), + sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(tensor.dims()), + dims_outside, + strides); + } else { + constexpr bool less = I + 1 < std::tuple_size>::value; + return CastToPyBufferImpl()(tensor); + } + } +}; +} // namespace details +inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { + auto buffer_info = details::CastToPyBufferImpl()(tensor); + return buffer_info; +} + +template +void PyTensorSetFromArray( + framework::Tensor &self, + py::array_t array) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); + } + + self.set_dims(framework::make_ddim(dims)); + auto *dst = self.mutable_data(paddle::platform::CPUPlace()); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + +} // namespace pybind +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 86fc60f26a..4ce2bef6fc 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,2 +1,3 @@ add_python_test(test_framework test_protobuf.py test_scope.py - test_default_scope_funcs.py test_op_creation_methods.py) + test_default_scope_funcs.py test_op_creation_methods.py + test_tensor.py) From 051676a7e483b59583d92cd49aff6bdace916dc4 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 18 Jul 2017 12:57:38 +0800 Subject: [PATCH 45/50] support multiple template parameter in KernelType for REGISTER_OP_XPU_KERNEL (#2932) --- paddle/framework/op_registry.h | 14 ++++++++------ paddle/framework/operator_test.cc | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 7aa59f0b63..48f77a6784 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -311,7 +311,7 @@ class OpRegisterHelper { /** * Macro to Register OperatorKernel. */ -#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, KernelType) \ +#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ "REGISTER_OP_KERNEL must be in global namespace"); \ @@ -320,17 +320,19 @@ class OpRegisterHelper { ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ key.place_ = PlaceType(); \ ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ - .reset(new KernelType()); \ + .reset(new __VA_ARGS__()); \ } \ }; \ static __op_kernel_register__##type##__ __reg_kernel_##type##__; \ int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } -#define REGISTER_OP_GPU_KERNEL(type, KernelType) \ - REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, KernelType) +// (type, KernelType) +#define REGISTER_OP_GPU_KERNEL(type, ...) \ + REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) -#define REGISTER_OP_CPU_KERNEL(type, KernelType) \ - REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, KernelType) +// (type, KernelType) +#define REGISTER_OP_CPU_KERNEL(type, ...) \ + REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) /** * Macro to mark what Operator and Kernel we will use and tell the compiler to diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 6fa110f94c..8e55d0111f 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -102,6 +102,7 @@ class OpWithKernelTest : public OperatorWithKernel { const std::vector& outputs) const override {} }; +template class CPUKernelTest : public OpKernel { public: void Compute(const KernelContext& ctx) const { @@ -171,7 +172,8 @@ class CPUKernalMultiInputsTest : public OpKernel { REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest, paddle::framework::OpKernelTestProtoAndCheckerMaker); -REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest); +REGISTER_OP_CPU_KERNEL(op_with_kernel, + paddle::framework::CPUKernelTest); // test with single input TEST(OpKernel, all) { From e00aae5386e4c2e9bef0e942f3be4405874c4f1c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 13:02:22 +0800 Subject: [PATCH 46/50] Fix Merge Bugs * There is a merge conflict when merge PR #2914 * Develop and PR #2914 both add `DDim::size` method, but did not triger git merge conflict while merge. --- paddle/framework/ddim.cc | 2 -- paddle/framework/ddim.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index d9d5e9f66e..d2ef85afe5 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -280,8 +280,6 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { return os; } -ssize_t DDim::size() const { return arity(*this); } - DDim::DDim(std::initializer_list init_list) { *this = make_ddim(init_list); } diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 0d10745a9e..3976c6c029 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -52,8 +52,6 @@ struct DDim { DDimVar getVar() { return var; } - ssize_t size() const; - bool operator==(DDim d) const; bool operator!=(DDim d) const; From 1ac0bffaf7d52178b037f4506c3d60d5d49241f4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 13:11:06 +0800 Subject: [PATCH 47/50] Override Net::DebugString to invoke inner ops_ Net::DebugString() print what it is and invokes inner operators DebugString() with a prefix '\t'. --- paddle/framework/net.cc | 9 +++++++++ paddle/framework/net.h | 2 ++ paddle/framework/operator.h | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/paddle/framework/net.cc b/paddle/framework/net.cc index 7311cda9a9..b9cd732d40 100644 --- a/paddle/framework/net.cc +++ b/paddle/framework/net.cc @@ -55,5 +55,14 @@ void PlainNet::CompleteAddOp() { add_op_done_ = true; } +std::string PlainNet::DebugString() const { + std::ostringstream os; + os << this->type_ << ":" << std::endl; + for (auto& op : ops_) { + os << "\t" << op->DebugString() << std::endl; + } + return os.str(); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 19a1620e29..33bb30ea07 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -88,6 +88,8 @@ class PlainNet : public Net { void CompleteAddOp() override; + std::string DebugString() const override; + std::vector ops_; private: diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index f7ed6e9f3d..b62cac6d27 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -48,7 +48,7 @@ class OperatorBase { return boost::get(attrs_.at(name)); } - std::string DebugString() const; + virtual std::string DebugString() const; /// Init will be called after CreateOperator, you can put some initialization /// logic here. From c1219a530c4641ec618e15c8f4e5a66ec0f637e8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 13:54:24 +0800 Subject: [PATCH 48/50] Change `in_out_idxs_` to shared_ptr * `in_out_idxs_` shares between all operator instance in same type of operator. --- paddle/framework/op_registry.h | 33 +++++++++++++++++++++++++-------- paddle/framework/operator.cc | 26 ++++++++------------------ paddle/framework/operator.h | 5 +---- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 48f77a6784..491ee21eec 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -198,6 +198,7 @@ Add a mark to which output is temporary is helpful for future optimization. class OpRegistry { using OpCreator = std::function; + using VarIndexMap = std::unordered_map; public: template @@ -212,6 +213,17 @@ class OpRegistry { op_proto.IsInitialized(), "Fail to initialize %s's OpProto, because %s is not initialized", op_type, op_proto.InitializationErrorString()); + + VarIndexMaps()[op_type].reset(new VarIndexMap()); + auto& varmap = *VarIndexMaps()[op_type]; + int idx = 0; + for (auto& var : op_proto.inputs()) { + varmap[var.name()] = idx++; + } + idx = 0; + for (auto& var : op_proto.outputs()) { + varmap[var.name()] = idx++; + } } static OperatorPtr CreateOp(const OpDesc& op_desc) { @@ -220,7 +232,6 @@ class OpRegistry { OperatorPtr op(creators().at(op_type)()); //! Fill op's data member. Not use constructor because it will be noising //! for Op developer. - const OpProto& op_proto = protos().at(op_type); op->type_ = op_desc.type(); // set op's inputs_ from desc. op->inputs_.reserve((size_t)op_desc.inputs_size()); @@ -240,25 +251,31 @@ class OpRegistry { //! Convert Temporary variable name to an unique variable name. GenerateTempVariableName(op.get()); - // set argument offsets stored in op. - CreateInOutOffsetMap(op, op_proto); + //! set argument offsets stored in op. + { + auto var_index_it = VarIndexMaps().find(op_type); + if (var_index_it != VarIndexMaps().end()) { + op->in_out_idxs_ = var_index_it->second; + } + } //! Other op's custom Init for a complex Op. For simple Op, the Init //! method do nothing. op->Init(); return op; } - // init op.in_out_idxs_ to accelerate argument's offset lookup. - static void CreateInOutOffsetMap(OperatorPtr op, const OpProto& proto) { - op->CreateInOutOffsetMap(proto); - } - static std::unordered_map& protos() { static std::unordered_map protos_; return protos_; }; private: + static std::unordered_map>& + VarIndexMaps() { + static std::unordered_map> maps_; + return maps_; + } + static void GenerateTempVariableName(OperatorBase* op) { static std::atomic gUniqId(0UL); for (auto& outname : op->outputs_) { diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 50cb2d9362..3647983053 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -19,21 +19,10 @@ limitations under the License. */ namespace paddle { namespace framework { -void OperatorBase::CreateInOutOffsetMap(const OpProto& proto) { - PADDLE_ENFORCE(in_out_idxs_.empty(), "duplicate call CreateInOutOffsetMap"); - for (int i = 0; i < proto.inputs_size(); i++) { - const auto& name = proto.inputs()[i].name(); - in_out_idxs_[name] = i; - } - for (int i = 0; i < proto.outputs_size(); i++) { - const auto& name = proto.outputs()[i].name(); - in_out_idxs_[name] = i; - } -} - const std::string& OperatorBase::Input(const std::string& name) const { - auto it = in_out_idxs_.find(name); - PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name); + auto it = in_out_idxs_->find(name); + PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", + name); if (attrs_.count("input_format") == 0) { return inputs_[it->second]; @@ -46,7 +35,7 @@ const std::string& OperatorBase::Input(const std::string& name) const { std::vector OperatorBase::Inputs(const std::string& name) const { auto input_format = GetAttr>("input_format"); - auto offset = in_out_idxs_.at(name); + auto offset = in_out_idxs_->at(name); return std::vector{ inputs_.begin() + input_format.at(offset), @@ -54,8 +43,9 @@ std::vector OperatorBase::Inputs(const std::string& name) const { } const std::string& OperatorBase::Output(const std::string& name) const { - auto it = in_out_idxs_.find(name); - PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name); + auto it = in_out_idxs_->find(name); + PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", + name); if (attrs_.count("output_format") == 0) { return outputs_[it->second]; @@ -68,7 +58,7 @@ const std::string& OperatorBase::Output(const std::string& name) const { std::vector OperatorBase::Outputs(const std::string& name) const { auto output_format = GetAttr>("output_format"); - auto offset = in_out_idxs_.at(name); + auto offset = in_out_idxs_->at(name); return std::vector{ outputs_.begin() + output_format.at(offset), diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 2fe9670677..2081b8a05c 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -82,16 +82,13 @@ class OperatorBase { // TODO add a vector_view to prevent memory copy. std::vector Outputs(const std::string& name) const; - // init in_out_idxs_ to accelerate argument's offset lookup. - void CreateInOutOffsetMap(const OpProto& proto); - public: std::string type_; std::vector inputs_; std::vector outputs_; AttributeMap attrs_; // store the arguments' offset described in op_desc. - std::unordered_map in_out_idxs_; + std::shared_ptr> in_out_idxs_; }; class KernelContext { From f6a51d9b4ef850ec650861de85ca1f3b55bfb4c8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 15:21:54 +0800 Subject: [PATCH 49/50] Make CreateOp in Plain C++ params --- paddle/framework/op_registry.h | 64 ++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 491ee21eec..c41fe10729 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -199,6 +199,7 @@ Add a mark to which output is temporary is helpful for future optimization. class OpRegistry { using OpCreator = std::function; using VarIndexMap = std::unordered_map; + using VarNameList = std::vector; public: template @@ -226,42 +227,51 @@ class OpRegistry { } } - static OperatorPtr CreateOp(const OpDesc& op_desc) { - //! Create a OpPtr by type. - std::string op_type = op_desc.type(); - OperatorPtr op(creators().at(op_type)()); - //! Fill op's data member. Not use constructor because it will be noising - //! for Op developer. - op->type_ = op_desc.type(); - // set op's inputs_ from desc. - op->inputs_.reserve((size_t)op_desc.inputs_size()); - std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), - std::back_inserter(op->inputs_)); - // set op's outputs_ from desc. - op->outputs_.reserve((size_t)op_desc.outputs_size()); - std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), - std::back_inserter(op->outputs_)); + static OperatorPtr CreateOp(const std::string& type, + const VarNameList& inputs, + const VarNameList& outputs, + const AttributeMap& attrs) { + auto op_create_it = creators().find(type); + PADDLE_ENFORCE(op_create_it != creators().end(), + "Operator %s cannot be found", type); - //! Fill attrs, and validate attrs. - for (auto& attr : op_desc.attrs()) { - op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); - } - op_checkers().at(op_type).Check(op->attrs_); + auto op = op_create_it->second(); + op->type_ = type; + op->inputs_ = inputs; + op->outputs_ = outputs; + op->attrs_ = attrs; + op_checkers().at(type).Check(op->attrs_); - //! Convert Temporary variable name to an unique variable name. - GenerateTempVariableName(op.get()); + GenerateTempVariableName(op); - //! set argument offsets stored in op. { - auto var_index_it = VarIndexMaps().find(op_type); + auto var_index_it = VarIndexMaps().find(type); if (var_index_it != VarIndexMaps().end()) { op->in_out_idxs_ = var_index_it->second; } } - //! Other op's custom Init for a complex Op. For simple Op, the Init - //! method do nothing. + op->Init(); - return op; + return OperatorPtr(op); + } + + static OperatorPtr CreateOp(const OpDesc& op_desc) { + std::vector inputs; + inputs.reserve((size_t)op_desc.inputs_size()); + std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), + std::back_inserter(inputs)); + + std::vector outputs; + outputs.reserve((size_t)op_desc.outputs_size()); + std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), + std::back_inserter(outputs)); + + AttributeMap attrs; + for (auto& attr : op_desc.attrs()) { + attrs[attr.name()] = AttrTypeHelper::GetAttrValue(attr); + } + + return CreateOp(op_desc.type(), inputs, outputs, attrs); } static std::unordered_map& protos() { From 1dc53a289fe724cd3772618de374aacbf72a87f6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 15:23:13 +0800 Subject: [PATCH 50/50] Use friend not to expose tensor's `type/place` --- paddle/framework/tensor.h | 14 +++++++++----- paddle/pybind/pybind.cc | 4 +--- paddle/pybind/{tensor.h => tensor_bind.h} | 18 +++++++++++------- 3 files changed, 21 insertions(+), 15 deletions(-) rename paddle/pybind/{tensor.h => tensor_bind.h} (84%) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 891cf73641..c495687dc4 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -24,6 +24,12 @@ limitations under the License. */ #include "paddle/platform/place.h" namespace paddle { +namespace pybind { +namespace details { // forward declare +template +struct CastToPyBufferImpl; +} // namespace details +} // namespace pybind namespace framework { class Tensor { @@ -128,10 +134,6 @@ class Tensor { DDim dims() const { return dims_; } - platform::Place place() const { return holder_->place(); } - - std::type_index type() const { return holder_->type(); } - private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. @@ -186,7 +188,9 @@ class Tensor { DDim dims_; size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. -}; // namespace framework + template + friend struct paddle::pybind::details::CastToPyBufferImpl; +}; // namespace framework } // namespace framework } // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index e3dc3e718c..0eef36f8ec 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include -#include +#include #include #include #include @@ -32,8 +32,6 @@ PYBIND11_PLUGIN(core) { py::class_(m, "Tensor", py::buffer_protocol()) .def_buffer([](pd::Tensor& self) -> py::buffer_info { - PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), - "Only CPU tensor can cast to numpy array"); return paddle::pybind::CastToPyBuffer(self); }) .def("get_dims", diff --git a/paddle/pybind/tensor.h b/paddle/pybind/tensor_bind.h similarity index 84% rename from paddle/pybind/tensor.h rename to paddle/pybind/tensor_bind.h index ef07144ad4..b96516643a 100644 --- a/paddle/pybind/tensor.h +++ b/paddle/pybind/tensor_bind.h @@ -40,7 +40,10 @@ template struct CastToPyBufferImpl { using CUR_TYPE = typename std::tuple_element>::type; py::buffer_info operator()(framework::Tensor &tensor) { - if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()), + "Only CPU tensor can cast to numpy array"); + + if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) { auto dim_vec = framework::vectorize(tensor.dims()); std::vector dims_outside; std::vector strides; @@ -54,12 +57,13 @@ struct CastToPyBufferImpl { prod *= dims_outside[i - 1]; } - return py::buffer_info(tensor.mutable_data(tensor.place()), - sizeof(CUR_TYPE), - py::format_descriptor::format(), - (size_t)framework::arity(tensor.dims()), - dims_outside, - strides); + return py::buffer_info( + tensor.mutable_data(tensor.holder_->place()), + sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(tensor.dims()), + dims_outside, + strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor);