From bde90be71bc2758b464960c8e2631ee177c1d9a7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 17 Jul 2017 18:10:18 +0800 Subject: [PATCH 1/3] Read/Write a Tensor Python Basically following http://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html * Use buffer protocol to return a view of Tensor. It can be cast to numpy array in Python. * Set a numpy array to a tensor. --- paddle/framework/tensor.h | 9 +- paddle/pybind/pybind.cc | 142 +++++++++++++++++- .../paddle/v2/framework/tests/test_tensor.py | 45 ++++++ 3 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_tensor.py diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 29bad7a00a..891cf73641 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include "paddle/framework/ddim.h" #include "paddle/framework/enforce.h" #include "paddle/memory/memory.h" @@ -127,6 +128,10 @@ class Tensor { DDim dims() const { return dims_; } + platform::Place place() const { return holder_->place(); } + + std::type_index type() const { return holder_->type(); } + private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. @@ -135,6 +140,7 @@ class Tensor { virtual void* ptr() const = 0; virtual platform::Place place() const = 0; virtual size_t size() const = 0; + virtual std::type_index type() const = 0; }; template @@ -159,7 +165,8 @@ class Tensor { virtual void* ptr() const { return static_cast(ptr_.get()); } virtual size_t size() const { return size_; } - virtual platform::Place place() const { return place_; } + virtual paddle::platform::Place place() const { return place_; } + virtual std::type_index type() const { return std::type_index(typeid(T)); } std::unique_ptr> ptr_; platform::Place place_; // record the place of ptr_. diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index b5ead21fd0..8222323e36 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -25,9 +26,143 @@ namespace pd = paddle::framework; USE_OP(add_two); +struct PlaceDebugString : public boost::static_visitor { + std::string operator()(const paddle::platform::GPUPlace& place) const { + return "GPU(" + std::to_string(place.device) + ")"; + } + + std::string operator()(const paddle::platform::CPUPlace& place) const { + return "CPU"; + } +}; + +template +struct TensorToPyBuffer { + pd::Tensor& self_; + explicit TensorToPyBuffer(pd::Tensor& self) : self_(self) {} + + bool CanCast() const { return std::type_index(typeid(T)) == self_.type(); } + + py::buffer_info Cast() const { + auto dim_vec = pd::vectorize(self_.dims()); + std::vector dims_outside; + std::vector strides; + dims_outside.resize(dim_vec.size()); + strides.resize(dim_vec.size()); + + size_t prod = 1; + for (size_t i = dim_vec.size(); i != 0; --i) { + dims_outside[i - 1] = (size_t)dim_vec[i - 1]; + strides[i - 1] = sizeof(float) * prod; + prod *= dims_outside[i - 1]; + } + + return py::buffer_info(self_.mutable_data(self_.place()), + sizeof(T), + py::format_descriptor::format(), + (size_t)pd::arity(self_.dims()), + dims_outside, + strides); + } +}; + +template +struct CastToPyBufferImpl; + +template +struct CastToPyBufferImpl { + py::buffer_info operator()(pd::Tensor& tensor) { + PADDLE_THROW("This type of tensor cannot be expose to Python"); + return py::buffer_info(); + } +}; + +template +struct CastToPyBufferImpl { + using CUR_TYPE = typename std::tuple_element>::type; + py::buffer_info operator()(pd::Tensor& tensor) { + TensorToPyBuffer cast_object(tensor); + if (cast_object.CanCast()) { + return cast_object.Cast(); + } else { + constexpr bool less = I + 1 < std::tuple_size>::value; + return CastToPyBufferImpl()(tensor); + } + } +}; + +template +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + for (size_t i = 0; i < vec.size(); ++i) { + os << vec[i]; + if (i + 1 != vec.size()) { + os << ", "; + } + } + return os; +} + +py::buffer_info CastToPyBuffer(pd::Tensor& tensor) { + auto buffer_info = CastToPyBufferImpl()(tensor); + return buffer_info; +} + +template +void PyTensorSet( + pd::Tensor& self, + py::array_t array) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); + } + + self.set_dims(pd::make_ddim(dims)); + auto* dst = self.mutable_data(paddle::platform::CPUPlace()); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); + py::class_( + m, "Place", R"DOC(Device Place Class.)DOC") + .def("__str__", + [](const paddle::platform::Place& self) { + return boost::apply_visitor(PlaceDebugString(), self); + }) + .def("is_gpu", + [](const paddle::platform::Place& self) { + return paddle::platform::is_gpu_place(self); + }) + .def("is_cpu", [](const paddle::platform::Place& self) { + return paddle::platform::is_cpu_place(self); + }); + + py::class_(m, "Tensor", py::buffer_protocol()) + .def("get_place", &pd::Tensor::place) + .def_buffer([](pd::Tensor& self) -> py::buffer_info { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), + "Only CPU tensor can cast to numpy array"); + return CastToPyBuffer(self); + }) + .def("get_dims", + [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) + .def("set_dims", + [](pd::Tensor& self, const std::vector& dim) { + self.set_dims(pd::make_ddim(dim)); + }) + .def("alloc_float", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("alloc_int", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) + .def("set", PyTensorSet) + .def("set", PyTensorSet); + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. @@ -38,7 +173,12 @@ All parameter, weight, gradient are variables in Paddle. *var.GetMutable() = val; }) .def("get_int", - [](const pd::Variable& var) -> int { return var.Get(); }); + [](const pd::Variable& var) -> int { return var.Get(); }) + .def("get_tensor", + [](pd::Variable& self) -> pd::Tensor* { + return self.GetMutable(); + }, + py::return_value_policy::reference); py::class_>(m, "Scope") .def(py::init&>()) diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py new file mode 100644 index 0000000000..b72aff3b9c --- /dev/null +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -0,0 +1,45 @@ +import paddle.v2.framework.core as core +import unittest +import numpy + + +class TestScope(unittest.TestCase): + def test_int_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_int() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1 + tensor_array[19, 11] = 2 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertEqual(1.0, tensor_array_2[3, 9]) + self.assertEqual(2.0, tensor_array_2[19, 11]) + + def test_float_tensor(self): + scope = core.Scope(None) + var = scope.create_var("test_tensor") + tensor = var.get_tensor() + + tensor.set_dims([1000, 784]) + tensor.alloc_float() + + tensor_array = numpy.array(tensor) + self.assertEqual((1000, 784), tensor_array.shape) + tensor_array[3, 9] = 1.0 + tensor_array[19, 11] = 2.0 + tensor.set(tensor_array) + + tensor_array_2 = numpy.array(tensor) + self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) + self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) + + +if __name__ == '__main__': + unittest.main() From a89c7ffa94bc26a879b8978273219980648aaec4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 11:57:31 +0800 Subject: [PATCH 2/3] Make Tensor <--> Numpy interactive in tensor.h * Follow review comments to seperate Tensor Numpy interactive methods in tensor.h. * Simplify logic for `CastToPyBufferImpl`, make it as one struct and in details namespace. * Remove `Scope` expose in Python, since it currently is useless. * Remove some debug functions. --- paddle/pybind/pybind.cc | 118 +----------------- paddle/pybind/tensor.h | 91 ++++++++++++++ .../paddle/v2/framework/tests/CMakeLists.txt | 3 +- 3 files changed, 97 insertions(+), 115 deletions(-) create mode 100644 paddle/pybind/tensor.h diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 8222323e36..e3dc3e718c 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -26,125 +27,14 @@ namespace pd = paddle::framework; USE_OP(add_two); -struct PlaceDebugString : public boost::static_visitor { - std::string operator()(const paddle::platform::GPUPlace& place) const { - return "GPU(" + std::to_string(place.device) + ")"; - } - - std::string operator()(const paddle::platform::CPUPlace& place) const { - return "CPU"; - } -}; - -template -struct TensorToPyBuffer { - pd::Tensor& self_; - explicit TensorToPyBuffer(pd::Tensor& self) : self_(self) {} - - bool CanCast() const { return std::type_index(typeid(T)) == self_.type(); } - - py::buffer_info Cast() const { - auto dim_vec = pd::vectorize(self_.dims()); - std::vector dims_outside; - std::vector strides; - dims_outside.resize(dim_vec.size()); - strides.resize(dim_vec.size()); - - size_t prod = 1; - for (size_t i = dim_vec.size(); i != 0; --i) { - dims_outside[i - 1] = (size_t)dim_vec[i - 1]; - strides[i - 1] = sizeof(float) * prod; - prod *= dims_outside[i - 1]; - } - - return py::buffer_info(self_.mutable_data(self_.place()), - sizeof(T), - py::format_descriptor::format(), - (size_t)pd::arity(self_.dims()), - dims_outside, - strides); - } -}; - -template -struct CastToPyBufferImpl; - -template -struct CastToPyBufferImpl { - py::buffer_info operator()(pd::Tensor& tensor) { - PADDLE_THROW("This type of tensor cannot be expose to Python"); - return py::buffer_info(); - } -}; - -template -struct CastToPyBufferImpl { - using CUR_TYPE = typename std::tuple_element>::type; - py::buffer_info operator()(pd::Tensor& tensor) { - TensorToPyBuffer cast_object(tensor); - if (cast_object.CanCast()) { - return cast_object.Cast(); - } else { - constexpr bool less = I + 1 < std::tuple_size>::value; - return CastToPyBufferImpl()(tensor); - } - } -}; - -template -std::ostream& operator<<(std::ostream& os, const std::vector& vec) { - for (size_t i = 0; i < vec.size(); ++i) { - os << vec[i]; - if (i + 1 != vec.size()) { - os << ", "; - } - } - return os; -} - -py::buffer_info CastToPyBuffer(pd::Tensor& tensor) { - auto buffer_info = CastToPyBufferImpl()(tensor); - return buffer_info; -} - -template -void PyTensorSet( - pd::Tensor& self, - py::array_t array) { - std::vector dims; - dims.reserve(array.ndim()); - for (size_t i = 0; i < array.ndim(); ++i) { - dims.push_back((int)array.shape()[i]); - } - - self.set_dims(pd::make_ddim(dims)); - auto* dst = self.mutable_data(paddle::platform::CPUPlace()); - std::memcpy(dst, array.data(), sizeof(T) * array.size()); -} - PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of Paddle Paddle"); - py::class_( - m, "Place", R"DOC(Device Place Class.)DOC") - .def("__str__", - [](const paddle::platform::Place& self) { - return boost::apply_visitor(PlaceDebugString(), self); - }) - .def("is_gpu", - [](const paddle::platform::Place& self) { - return paddle::platform::is_gpu_place(self); - }) - .def("is_cpu", [](const paddle::platform::Place& self) { - return paddle::platform::is_cpu_place(self); - }); - py::class_(m, "Tensor", py::buffer_protocol()) - .def("get_place", &pd::Tensor::place) .def_buffer([](pd::Tensor& self) -> py::buffer_info { PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), "Only CPU tensor can cast to numpy array"); - return CastToPyBuffer(self); + return paddle::pybind::CastToPyBuffer(self); }) .def("get_dims", [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) @@ -160,8 +50,8 @@ PYBIND11_PLUGIN(core) { [](pd::Tensor& self) { self.mutable_data(paddle::platform::CPUPlace()); }) - .def("set", PyTensorSet) - .def("set", PyTensorSet); + .def("set", paddle::pybind::PyTensorSetFromArray) + .def("set", paddle::pybind::PyTensorSetFromArray); py::class_(m, "Variable", R"DOC(Variable Class. diff --git a/paddle/pybind/tensor.h b/paddle/pybind/tensor.h new file mode 100644 index 0000000000..ef07144ad4 --- /dev/null +++ b/paddle/pybind/tensor.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include + +namespace py = pybind11; + +namespace paddle { + +namespace pybind { + +namespace details { + +template +struct CastToPyBufferImpl; + +template +struct CastToPyBufferImpl { + py::buffer_info operator()(framework::Tensor &tensor) { + PADDLE_THROW("This type of tensor cannot be expose to Python"); + return py::buffer_info(); + } +}; + +template +struct CastToPyBufferImpl { + using CUR_TYPE = typename std::tuple_element>::type; + py::buffer_info operator()(framework::Tensor &tensor) { + if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) { + auto dim_vec = framework::vectorize(tensor.dims()); + std::vector dims_outside; + std::vector strides; + dims_outside.resize(dim_vec.size()); + strides.resize(dim_vec.size()); + + size_t prod = 1; + for (size_t i = dim_vec.size(); i != 0; --i) { + dims_outside[i - 1] = (size_t)dim_vec[i - 1]; + strides[i - 1] = sizeof(CUR_TYPE) * prod; + prod *= dims_outside[i - 1]; + } + + return py::buffer_info(tensor.mutable_data(tensor.place()), + sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(tensor.dims()), + dims_outside, + strides); + } else { + constexpr bool less = I + 1 < std::tuple_size>::value; + return CastToPyBufferImpl()(tensor); + } + } +}; +} // namespace details +inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { + auto buffer_info = details::CastToPyBufferImpl()(tensor); + return buffer_info; +} + +template +void PyTensorSetFromArray( + framework::Tensor &self, + py::array_t array) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); + } + + self.set_dims(framework::make_ddim(dims)); + auto *dst = self.mutable_data(paddle::platform::CPUPlace()); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + +} // namespace pybind +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 86fc60f26a..4ce2bef6fc 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,2 +1,3 @@ add_python_test(test_framework test_protobuf.py test_scope.py - test_default_scope_funcs.py test_op_creation_methods.py) + test_default_scope_funcs.py test_op_creation_methods.py + test_tensor.py) From 1dc53a289fe724cd3772618de374aacbf72a87f6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 18 Jul 2017 15:23:13 +0800 Subject: [PATCH 3/3] Use friend not to expose tensor's `type/place` --- paddle/framework/tensor.h | 14 +++++++++----- paddle/pybind/pybind.cc | 4 +--- paddle/pybind/{tensor.h => tensor_bind.h} | 18 +++++++++++------- 3 files changed, 21 insertions(+), 15 deletions(-) rename paddle/pybind/{tensor.h => tensor_bind.h} (84%) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 891cf73641..c495687dc4 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -24,6 +24,12 @@ limitations under the License. */ #include "paddle/platform/place.h" namespace paddle { +namespace pybind { +namespace details { // forward declare +template +struct CastToPyBufferImpl; +} // namespace details +} // namespace pybind namespace framework { class Tensor { @@ -128,10 +134,6 @@ class Tensor { DDim dims() const { return dims_; } - platform::Place place() const { return holder_->place(); } - - std::type_index type() const { return holder_->type(); } - private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. @@ -186,7 +188,9 @@ class Tensor { DDim dims_; size_t numel_; // cache of `product(dims_)` size_t offset_; // marks the begin of tensor data area. -}; // namespace framework + template + friend struct paddle::pybind::details::CastToPyBufferImpl; +}; // namespace framework } // namespace framework } // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index e3dc3e718c..0eef36f8ec 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include -#include +#include #include #include #include @@ -32,8 +32,6 @@ PYBIND11_PLUGIN(core) { py::class_(m, "Tensor", py::buffer_protocol()) .def_buffer([](pd::Tensor& self) -> py::buffer_info { - PADDLE_ENFORCE(paddle::platform::is_cpu_place(self.place()), - "Only CPU tensor can cast to numpy array"); return paddle::pybind::CastToPyBuffer(self); }) .def("get_dims", diff --git a/paddle/pybind/tensor.h b/paddle/pybind/tensor_bind.h similarity index 84% rename from paddle/pybind/tensor.h rename to paddle/pybind/tensor_bind.h index ef07144ad4..b96516643a 100644 --- a/paddle/pybind/tensor.h +++ b/paddle/pybind/tensor_bind.h @@ -40,7 +40,10 @@ template struct CastToPyBufferImpl { using CUR_TYPE = typename std::tuple_element>::type; py::buffer_info operator()(framework::Tensor &tensor) { - if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()), + "Only CPU tensor can cast to numpy array"); + + if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) { auto dim_vec = framework::vectorize(tensor.dims()); std::vector dims_outside; std::vector strides; @@ -54,12 +57,13 @@ struct CastToPyBufferImpl { prod *= dims_outside[i - 1]; } - return py::buffer_info(tensor.mutable_data(tensor.place()), - sizeof(CUR_TYPE), - py::format_descriptor::format(), - (size_t)framework::arity(tensor.dims()), - dims_outside, - strides); + return py::buffer_info( + tensor.mutable_data(tensor.holder_->place()), + sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(tensor.dims()), + dims_outside, + strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor);