From e2ba13373aeb4b345dc5909510d686235609983e Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 15:39:49 +0800 Subject: [PATCH 01/26] enable operator gpu unittest --- paddle/framework/tensor.h | 2 ++ paddle/pybind/pybind.cc | 26 +++++++++++++++++++------- paddle/pybind/tensor_bind.h | 29 +++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index a36f375d2e..69019c7adc 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -137,6 +137,8 @@ class Tensor { const DDim& dims() const { return dims_; } + paddle::platform::Place place() const { return holder_->place(); } + private: // Placeholder hides type T, so it doesn't appear as a template // parameter of Variable. diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index d48a948d21..4b1bbc2cf2 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" +#include "paddle/platform/place.h" #include "paddle/pybind/tensor_bind.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" @@ -62,12 +63,12 @@ PYBIND11_PLUGIN(core) { self.Resize(pd::make_ddim(dim)); }) .def("alloc_float", - [](pd::Tensor& self) { - self.mutable_data(paddle::platform::CPUPlace()); + [](pd::Tensor& self, paddle::platform::Place& place) { + self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self) { - self.mutable_data(paddle::platform::CPUPlace()); + [](pd::Tensor& self, paddle::platform::Place& place) { + self.mutable_data(place); }) .def("set", paddle::pybind::PyTensorSetFromArray) .def("set", paddle::pybind::PyTensorSetFromArray) @@ -122,9 +123,20 @@ All parameter, weight, gradient are variables in Paddle. .def("temp", pd::OperatorBase::TMP_VAR_NAME); py::class_(m, "DeviceContext") - .def_static("cpu_context", []() -> paddle::platform::DeviceContext* { - return new paddle::platform::CPUDeviceContext(); - }); + .def_static( + "create", + [](paddle::platform::Place) -> paddle::platform::DeviceContext* { + if (paddle::platform::is_gpu_place(place)) { + return new paddle::platform::GPUDeviceContext(place); + } else if (paddle::platform::is_cpu_place(place)) { + return new paddle::platform::CPUDeviceContext(); + } + }); + + py::class_(m, "GPUPlace").def(py::init()); + .def(py::init<>()); + + py::class_(m, "CPUPlace").def(py::init<>()); py::class_> operator_base( m, "Operator"); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index 995e102bf9..0caece6e95 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -13,9 +13,10 @@ limitations under the License. */ #pragma once -#include -#include -#include +#include "paddle/framework/tensor.h" +#include "paddle/memory/memcpy.h" +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" namespace py = pybind11; @@ -56,7 +57,6 @@ struct CastToPyBufferImpl { strides[i - 1] = sizeof(CUR_TYPE) * prod; prod *= dims_outside[i - 1]; } - return py::buffer_info( tensor.mutable_data(tensor.holder_->place()), sizeof(CUR_TYPE), @@ -87,8 +87,25 @@ void PyTensorSetFromArray( } self.Resize(framework::make_ddim(dims)); - auto *dst = self.mutable_data(paddle::platform::CPUPlace()); - std::memcpy(dst, array.data(), sizeof(T) * array.size()); + auto *dst = self.mutable_data(self.place()); + + if (paddle::platform::is_cpu_place(self.place())) { + paddle::memory::Copy( + place, dst, place, array.data(), sizeof(T) * array.size()); + } else if (paddle::platform::is_gpu_place(place)) { +#ifdef PADDLE_ONLY_CPU + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); +#else + paddle::memory::Copy( + place, + dst, + paddle::platform::CPUPlace(), + array.data(), + sizeof(T) * array.size()); +#endif + } } } // namespace pybind From d5109130f145327ae3098fd615a118d54e8016fe Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 15:58:38 +0800 Subject: [PATCH 02/26] set default cpu place for tensor alloc --- paddle/framework/tensor.h | 17 ++++++++++++----- paddle/pybind/pybind.cc | 8 ++++++++ paddle/pybind/tensor_bind.h | 10 ++++++++-- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 69019c7adc..10813d4aad 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include "paddle/framework/ddim.h" +#include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" @@ -104,15 +105,21 @@ class Tensor { template void CopyFrom(const Tensor& src, platform::Place dst_place) { - PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && - platform::is_cpu_place(dst_place), - "Tensor::CopyFrom only support CPU now."); - src.EnforceSufficientMemory(); + PADDLE_ENFORCE(platform::is_cpu_place(dst_place), + "Tensor::CopyFrom only support dst CPU now."); size_t size = product(src.dims_) * sizeof(T); Resize(src.dims()); const void* src_ptr = static_cast(src.data()); void* dst_ptr = static_cast(mutable_data(dst_place)); - memcpy(dst_ptr, src_ptr, size); + if (paddle::platform::is_cpu_place(holder_->place())) { + std::memcpy(dst_ptr, src_ptr, size); + } else if (paddle::platform::is_gpu_place(holder_->place())) { +#ifdef PADDLE_ONLY_CPU + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); +#else + GpuMemcpySync(dst_ptr, src_ptr, size, cudaMemcpyDeviceToHost); +#endif + } } template diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 4b1bbc2cf2..db82c56da7 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -66,10 +66,18 @@ PYBIND11_PLUGIN(core) { [](pd::Tensor& self, paddle::platform::Place& place) { self.mutable_data(place); }) + .def("alloc_float", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) .def("alloc_int", [](pd::Tensor& self, paddle::platform::Place& place) { self.mutable_data(place); }) + .def("alloc_int", + [](pd::Tensor& self) { + self.mutable_data(paddle::platform::CPUPlace()); + }) .def("set", paddle::pybind::PyTensorSetFromArray) .def("set", paddle::pybind::PyTensorSetFromArray) .def("shape", diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index 0caece6e95..1af7c0a302 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -57,11 +57,17 @@ struct CastToPyBufferImpl { strides[i - 1] = sizeof(CUR_TYPE) * prod; prod *= dims_outside[i - 1]; } + Tensor dst_tensor; + if (paddle::platform::is_gpu_place(tensor.holder_->place())) { + dst_tensor.CopyFrom(tensor, platform::CPUPlace()); + } else if (paddle::platform::is_gpu_place(tensor.holder_->place())) { + dst_tensor = tensor; + } return py::buffer_info( - tensor.mutable_data(tensor.holder_->place()), + dst_tensor.mutable_data(dst_tensor.holder_->place()), sizeof(CUR_TYPE), py::format_descriptor::format(), - (size_t)framework::arity(tensor.dims()), + (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides); } else { From aa5ca8a970c4c4782f854dc926f6fa54909061a5 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 16:32:01 +0800 Subject: [PATCH 03/26] fix build error --- paddle/pybind/pybind.cc | 27 +++++++++++++++++---------- paddle/pybind/tensor_bind.h | 20 +++++++------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index db82c56da7..24879ee78f 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" +#include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "paddle/pybind/tensor_bind.h" #include "pybind11/numpy.h" @@ -131,18 +132,24 @@ All parameter, weight, gradient are variables in Paddle. .def("temp", pd::OperatorBase::TMP_VAR_NAME); py::class_(m, "DeviceContext") - .def_static( - "create", - [](paddle::platform::Place) -> paddle::platform::DeviceContext* { - if (paddle::platform::is_gpu_place(place)) { - return new paddle::platform::GPUDeviceContext(place); - } else if (paddle::platform::is_cpu_place(place)) { - return new paddle::platform::CPUDeviceContext(); - } - }); + .def_static("cpu_context", + []() -> paddle::platform::DeviceContext* { + return new paddle::platform::CPUDeviceContext(); + }) + .def_static("gpu_context", + [](paddle::platform::Place& place) + -> paddle::platform::DeviceContext* { +#ifdef PADDLE_ONLY_CPU + + // PADDLE_THROW("'GPUPlace' is not supported in CPU only + // device."); + return nullptr; +#else + return new paddle::platform::CUDADeviceContext(place); +#endif + }); py::class_(m, "GPUPlace").def(py::init()); - .def(py::init<>()); py::class_(m, "CPUPlace").def(py::init<>()); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index 1af7c0a302..a94c89d328 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -13,6 +13,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/framework/tensor.h" #include "paddle/memory/memcpy.h" #include "pybind11/numpy.h" @@ -57,9 +58,9 @@ struct CastToPyBufferImpl { strides[i - 1] = sizeof(CUR_TYPE) * prod; prod *= dims_outside[i - 1]; } - Tensor dst_tensor; + framework::Tensor dst_tensor; if (paddle::platform::is_gpu_place(tensor.holder_->place())) { - dst_tensor.CopyFrom(tensor, platform::CPUPlace()); + dst_tensor.CopyFrom(tensor, platform::CPUPlace()); } else if (paddle::platform::is_gpu_place(tensor.holder_->place())) { dst_tensor = tensor; } @@ -96,20 +97,13 @@ void PyTensorSetFromArray( auto *dst = self.mutable_data(self.place()); if (paddle::platform::is_cpu_place(self.place())) { - paddle::memory::Copy( - place, dst, place, array.data(), sizeof(T) * array.size()); - } else if (paddle::platform::is_gpu_place(place)) { + std::memcpy(dst, array.data(), sizeof(T) * array.size()); + } else if (paddle::platform::is_gpu_place(self.place())) { #ifdef PADDLE_ONLY_CPU PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); #else - paddle::memory::Copy( - place, - dst, - paddle::platform::CPUPlace(), - array.data(), - sizeof(T) * array.size()); + GpuMemcpySync( + dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); #endif } } From ff594fac84920f710dbda44566bd880f7d32be4e Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 16:35:36 +0800 Subject: [PATCH 04/26] make gpu_context inside macro --- paddle/pybind/pybind.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 24879ee78f..e53340cc9f 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -136,18 +136,14 @@ All parameter, weight, gradient are variables in Paddle. []() -> paddle::platform::DeviceContext* { return new paddle::platform::CPUDeviceContext(); }) +#ifndef PADDLE_ONLY_CPU .def_static("gpu_context", [](paddle::platform::Place& place) -> paddle::platform::DeviceContext* { -#ifdef PADDLE_ONLY_CPU - - // PADDLE_THROW("'GPUPlace' is not supported in CPU only - // device."); - return nullptr; -#else return new paddle::platform::CUDADeviceContext(place); + }) #endif - }); + ; py::class_(m, "GPUPlace").def(py::init()); From a71a9e639304e1e1301c00ef890d5cb000b500b1 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 09:25:46 +0000 Subject: [PATCH 05/26] fix gpu build error --- paddle/framework/tensor.h | 2 +- paddle/pybind/pybind.cc | 9 ++++----- paddle/pybind/tensor_bind.h | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 10813d4aad..5f07256c05 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -117,7 +117,7 @@ class Tensor { #ifdef PADDLE_ONLY_CPU PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); #else - GpuMemcpySync(dst_ptr, src_ptr, size, cudaMemcpyDeviceToHost); + platform::GpuMemcpySync(dst_ptr, src_ptr, size, cudaMemcpyDeviceToHost); #endif } } diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index e53340cc9f..2cc26a926e 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -138,13 +138,12 @@ All parameter, weight, gradient are variables in Paddle. }) #ifndef PADDLE_ONLY_CPU .def_static("gpu_context", - [](paddle::platform::Place& place) + [](paddle::platform::GPUPlace& place) -> paddle::platform::DeviceContext* { - return new paddle::platform::CUDADeviceContext(place); - }) + return new paddle::platform::CUDADeviceContext(place); + }) #endif - ; - + ; // NOLINT py::class_(m, "GPUPlace").def(py::init()); py::class_(m, "CPUPlace").def(py::init<>()); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index a94c89d328..fdf8861b68 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -102,7 +102,7 @@ void PyTensorSetFromArray( #ifdef PADDLE_ONLY_CPU PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); #else - GpuMemcpySync( + platform::GpuMemcpySync( dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); #endif } From 358261f0bdf2ce887a3ff77218694828a6527ede Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 12:41:11 +0000 Subject: [PATCH 06/26] fix gpu build error --- paddle/pybind/pybind.cc | 22 ++++++----- paddle/pybind/tensor_bind.h | 37 ++++++++++++------- .../paddle/v2/framework/tests/op_test_util.py | 3 +- .../paddle/v2/framework/tests/test_fc_op.py | 7 ++-- .../paddle/v2/framework/tests/test_tensor.py | 11 +++--- 5 files changed, 47 insertions(+), 33 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 2cc26a926e..27a80f7ffa 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -64,23 +64,25 @@ PYBIND11_PLUGIN(core) { self.Resize(pd::make_ddim(dim)); }) .def("alloc_float", - [](pd::Tensor& self, paddle::platform::Place& place) { + [](pd::Tensor& self, paddle::platform::GPUPlace& place) { self.mutable_data(place); }) .def("alloc_float", - [](pd::Tensor& self) { - self.mutable_data(paddle::platform::CPUPlace()); + [](pd::Tensor& self, paddle::platform::CPUPlace& place) { + self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self, paddle::platform::Place& place) { + [](pd::Tensor& self, paddle::platform::CPUPlace& place) { self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor& self) { - self.mutable_data(paddle::platform::CPUPlace()); + [](pd::Tensor& self, paddle::platform::GPUPlace& place) { + self.mutable_data(place); }) - .def("set", paddle::pybind::PyTensorSetFromArray) - .def("set", paddle::pybind::PyTensorSetFromArray) + .def("set", paddle::pybind::PyCPUTensorSetFromArray) + .def("set", paddle::pybind::PyCUDATensorSetFromArray) + .def("set", paddle::pybind::PyCPUTensorSetFromArray) + .def("set", paddle::pybind::PyCUDATensorSetFromArray) .def("shape", [](pd::Tensor& self) { return pd::vectorize(self.dims()); }); @@ -144,9 +146,9 @@ All parameter, weight, gradient are variables in Paddle. }) #endif ; // NOLINT - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace").def(py::init()); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace").def(py::init<>()); py::class_> operator_base( m, "Operator"); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index fdf8861b68..86eff97d72 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -61,7 +61,7 @@ struct CastToPyBufferImpl { framework::Tensor dst_tensor; if (paddle::platform::is_gpu_place(tensor.holder_->place())) { dst_tensor.CopyFrom(tensor, platform::CPUPlace()); - } else if (paddle::platform::is_gpu_place(tensor.holder_->place())) { + } else if (paddle::platform::is_cpu_place(tensor.holder_->place())) { dst_tensor = tensor; } return py::buffer_info( @@ -84,9 +84,10 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { } template -void PyTensorSetFromArray( +void PyCPUTensorSetFromArray( framework::Tensor &self, - py::array_t array) { + py::array_t array, + paddle::platform::CPUPlace &place) { std::vector dims; dims.reserve(array.ndim()); for (size_t i = 0; i < array.ndim(); ++i) { @@ -94,18 +95,26 @@ void PyTensorSetFromArray( } self.Resize(framework::make_ddim(dims)); - auto *dst = self.mutable_data(self.place()); - - if (paddle::platform::is_cpu_place(self.place())) { - std::memcpy(dst, array.data(), sizeof(T) * array.size()); - } else if (paddle::platform::is_gpu_place(self.place())) { -#ifdef PADDLE_ONLY_CPU - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); -#else - platform::GpuMemcpySync( - dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); -#endif + auto *dst = self.mutable_data(place); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); +} + +template +void PyCUDATensorSetFromArray( + framework::Tensor &self, + py::array_t array, + paddle::platform::GPUPlace &place) { + std::vector dims; + dims.reserve(array.ndim()); + for (size_t i = 0; i < array.ndim(); ++i) { + dims.push_back((int)array.shape()[i]); } + + self.Resize(framework::make_ddim(dims)); + auto *dst = self.mutable_data(place); + std::memcpy(dst, array.data(), sizeof(T) * array.size()); + paddle::platform::GpuMemcpySync( + dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); } } // namespace pybind diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 7b62313f8a..35ee955585 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -25,6 +25,7 @@ class OpTestMeta(type): self.assertIsNotNone(func) scope = core.Scope(None) + place = core.CPUPlace() kwargs = dict() for in_name in func.all_input_args: @@ -33,7 +34,7 @@ class OpTestMeta(type): var = scope.create_var(in_name).get_tensor() arr = getattr(self, in_name) var.set_dims(arr.shape) - var.set(arr) + var.set(arr, place) else: kwargs[in_name] = "@EMPTY@" diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py index 59e7e61249..d5fd590892 100644 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ b/python/paddle/v2/framework/tests/test_fc_op.py @@ -7,17 +7,18 @@ import paddle.v2.framework.create_op_creation_methods as creation class TestFc(unittest.TestCase): def test_fc(self): scope = core.Scope(None) + place = core.CPUPlace() x = scope.create_var("X") x_tensor = x.get_tensor() x_tensor.set_dims([1000, 784]) - x_tensor.alloc_float() + x_tensor.alloc_float(place) w = scope.create_var("W") w_tensor = w.get_tensor() w_tensor.set_dims([784, 100]) - w_tensor.alloc_float() + w_tensor.alloc_float(place) - w_tensor.set(numpy.random.random((784, 100)).astype("float32")) + w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place) # Set a real numpy array here. # x_tensor.set(numpy.array([])) diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py index b72aff3b9c..54b627b38c 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -7,16 +7,16 @@ class TestScope(unittest.TestCase): def test_int_tensor(self): scope = core.Scope(None) var = scope.create_var("test_tensor") + place = core.CPUPlace() tensor = var.get_tensor() tensor.set_dims([1000, 784]) - tensor.alloc_int() - + tensor.alloc_int(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) tensor_array[3, 9] = 1 tensor_array[19, 11] = 2 - tensor.set(tensor_array) + tensor.set(tensor_array, place) tensor_array_2 = numpy.array(tensor) self.assertEqual(1.0, tensor_array_2[3, 9]) @@ -25,16 +25,17 @@ class TestScope(unittest.TestCase): def test_float_tensor(self): scope = core.Scope(None) var = scope.create_var("test_tensor") + place = core.CPUPlace() tensor = var.get_tensor() tensor.set_dims([1000, 784]) - tensor.alloc_float() + tensor.alloc_float(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) tensor_array[3, 9] = 1.0 tensor_array[19, 11] = 2.0 - tensor.set(tensor_array) + tensor.set(tensor_array, place) tensor_array_2 = numpy.array(tensor) self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) From 4ecf68e0ea08b71fc061b1104ffeb225592b280d Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 25 Jul 2017 15:58:09 +0000 Subject: [PATCH 07/26] fix bug in register gpu OpKernel --- paddle/framework/op_registry.h | 7 ++++--- paddle/framework/operator.h | 6 +++++- paddle/pybind/pybind.cc | 4 +++- paddle/pybind/tensor_bind.h | 6 ++---- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index f16deae028..384f0f631d 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -403,15 +403,16 @@ class GradOpRegisterHelper { STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ "REGISTER_OP_KERNEL must be in global namespace"); \ - struct __op_kernel_register__##type##__ { \ - __op_kernel_register__##type##__() { \ + struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \ + __op_kernel_register__##type##__##DEVICE_TYPE##__() { \ ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ key.place_ = PlaceType(); \ ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ .reset(new __VA_ARGS__()); \ } \ }; \ - static __op_kernel_register__##type##__ __reg_kernel_##type##__; \ + static __op_kernel_register__##type##__##DEVICE_TYPE##__ \ + __reg_kernel_##type##__##DEVICE_TYPE##__; \ int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } // (type, KernelType) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index f59314f828..97e9ec1bcf 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -199,7 +199,11 @@ class OperatorWithKernel : public OperatorBase { place_ = dev_ctx.GetPlace(); } - bool operator==(const OpKernelKey& o) const { return place_ == o.place_; } + // bool operator==(const OpKernelKey& o) const { return place_ == o.place_; + // } + bool operator==(const OpKernelKey& o) const { + return platform::places_are_same_class(place_, o.place_); + } }; struct OpKernelHash { diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 27a80f7ffa..1229451523 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -80,9 +80,11 @@ PYBIND11_PLUGIN(core) { self.mutable_data(place); }) .def("set", paddle::pybind::PyCPUTensorSetFromArray) - .def("set", paddle::pybind::PyCUDATensorSetFromArray) .def("set", paddle::pybind::PyCPUTensorSetFromArray) +#ifndef PADDLE_ONLY_CPU + .def("set", paddle::pybind::PyCUDATensorSetFromArray) .def("set", paddle::pybind::PyCUDATensorSetFromArray) +#endif .def("shape", [](pd::Tensor& self) { return pd::vectorize(self.dims()); }); diff --git a/paddle/pybind/tensor_bind.h b/paddle/pybind/tensor_bind.h index 86eff97d72..def37219cc 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/pybind/tensor_bind.h @@ -42,9 +42,6 @@ template struct CastToPyBufferImpl { using CUR_TYPE = typename std::tuple_element>::type; py::buffer_info operator()(framework::Tensor &tensor) { - PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()), - "Only CPU tensor can cast to numpy array"); - if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) { auto dim_vec = framework::vectorize(tensor.dims()); std::vector dims_outside; @@ -99,6 +96,7 @@ void PyCPUTensorSetFromArray( std::memcpy(dst, array.data(), sizeof(T) * array.size()); } +#ifndef PADDLE_ONLY_CPU template void PyCUDATensorSetFromArray( framework::Tensor &self, @@ -112,10 +110,10 @@ void PyCUDATensorSetFromArray( self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data(place); - std::memcpy(dst, array.data(), sizeof(T) * array.size()); paddle::platform::GpuMemcpySync( dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice); } +#endif } // namespace pybind } // namespace paddle From 84660653f847e313da0226f376e95d8dc3523d50 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 27 Jul 2017 17:21:40 +0800 Subject: [PATCH 08/26] add RowL2NormLayer --- paddle/gserver/layers/RowL2NormLayer.cpp | 99 +++++++++++++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 13 +++ python/paddle/trainer/config_parser.py | 10 ++ .../paddle/trainer_config_helpers/layers.py | 38 +++++++ 4 files changed, 160 insertions(+) create mode 100644 paddle/gserver/layers/RowL2NormLayer.cpp diff --git a/paddle/gserver/layers/RowL2NormLayer.cpp b/paddle/gserver/layers/RowL2NormLayer.cpp new file mode 100644 index 0000000000..1362c6ef12 --- /dev/null +++ b/paddle/gserver/layers/RowL2NormLayer.cpp @@ -0,0 +1,99 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * A layer for L2 normalization in each row, + * \f[ + * out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}} + * \f] + * where the size of \f$in\f$ is (batchSize x dataDim), + * and the size of \f$out\f$ is (batchSize x dataDim). + */ + +class RowL2NormLayer : public Layer { +protected: + MatrixPtr inSquare_; + MatrixPtr reciSqrtRowSquareSum_; + MatrixPtr dotSum_; + +public: + explicit RowL2NormLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(row_l2_norm, RowL2NormLayer); + +bool RowL2NormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 1U); + + return true; +} + +void RowL2NormLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr inV = getInputValue(0); + + /* malloc memory for the output_ if necessary */ + size_t batchSize = inV->getHeight(); + size_t dataDim = getSize(); + CHECK_EQ(dataDim, inV->getWidth()); + resetOutput(batchSize, dataDim); + MatrixPtr outV = getOutputValue(); + + Matrix::resizeOrCreate(inSquare_, batchSize, dataDim, false, useGpu_); + inV->square2(*inSquare_); + Matrix::resizeOrCreate(reciSqrtRowSquareSum_, batchSize, 1, false, useGpu_); + inSquare_->rowSum(*reciSqrtRowSquareSum_); + reciSqrtRowSquareSum_->sqrt2(*reciSqrtRowSquareSum_); + reciSqrtRowSquareSum_->scalarDiv(*reciSqrtRowSquareSum_, 1.0); + outV->rowScale(0, *inV, *reciSqrtRowSquareSum_); +} + +void RowL2NormLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV = getInputValue(0); + MatrixPtr inG = getInputGrad(0); + MatrixPtr outV = getOutputValue(); + MatrixPtr outG = getOutputGrad(); + size_t batchSize = inV->getHeight(); + + // inG[ij] += outG[ij] / reciSqrtRowSquareSum + // inG[ij] += -inV[ij] * reciSqrtRowSquareSum * reciSqrtRowSquareSum * + // DotMul(outG[i], inV[i]) + if (inG) { + Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_); + dotSum_->zeroMem(); + dotSum_->rowDotMul(0, *outG, *outV); + dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_); + dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_); + inSquare_->rowScale(0, *inV, *dotSum_); + inG->sub(*inSquare_); + inG->addRowScale(0, *outG, *reciSqrtRowSquareSum_); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0975c3bc95..0d8789e0a2 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1879,6 +1879,19 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, RowL2NormLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("row_l2_norm"); + config.layerConfig.set_size(size); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5477158ecb..c5e56e59de 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2725,6 +2725,16 @@ class SumToOneNormLayer(LayerBase): self.set_layer_size(input_layer0.size) +@config_layer('row_l2_norm') +class RowL2NormLayer(LayerBase): + def __init__(self, name, inputs, device=None): + super(RowL2NormLayer, self).__init__( + name, 'row_l2_norm', 0, inputs=inputs, device=device) + config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input') + input_layer0 = self.get_input_layer(0) + self.set_layer_size(input_layer0.size) + + @config_layer('cos_vm') class CosSimVecMatLayer(LayerBase): def __init__(self, name, size, inputs, cos_scale=1.0, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 14f072fc55..9985a290a5 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -76,6 +76,7 @@ __all__ = [ 'trans_layer', 'rotate_layer', 'sum_to_one_norm_layer', + 'row_l2_norm_layer', 'get_output_layer', 'LayerType', 'context_projection', @@ -159,6 +160,7 @@ class LayerType(object): BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' + ROW_L2_NORM_LAYER = 'row_l2_norm' ADDTO_LAYER = 'addto' CONCAT_LAYER = 'concat' @@ -2849,6 +2851,42 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size) +@wrap_name_default() +@layer_support() +def row_l2_norm_layer(input, name=None, layer_attr=None): + """ + A layer for L2-normalization in each row. + + .. math:: + out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}} + + where the size of :math:`in` is (batchSize x dataDim) , + and the size of :math:`out` is a (batchSize x dataDim) . + + The example usage is: + + .. code-block:: python + + row_l2_norm_layer = row_l2_norm_layer(input=layer) + + :param input: Input layer. + :type input: LayerOutput + :param name: Layer name. + :type name: basestring + :param layer_attr: extra layer attributes. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + Layer( + name=name, + type=LayerType.ROW_L2_NORM_LAYER, + inputs=[input.name], + **ExtraAttr.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.ROW_L2_NORM_LAYER, parents=[input], size=input.size) + + @wrap_name_default("addto") @wrap_act_default(act=LinearActivation()) @wrap_bias_attr_default(has_bias=False) From 23a8d015e07f6da391c213a3f0c4dced9ce548d5 Mon Sep 17 00:00:00 2001 From: guosheng Date: Sun, 30 Jul 2017 23:05:51 +0800 Subject: [PATCH 09/26] add ClipLayer --- paddle/gserver/layers/ClipLayer.cpp | 78 +++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 15 ++ paddle/math/BaseMatrix.cu | 6 + paddle/math/BaseMatrix.h | 7 + proto/ModelConfig.proto | 6 + python/paddle/trainer/config_parser.py | 17 ++ .../paddle/trainer_config_helpers/layers.py | 158 +++++++----------- 7 files changed, 190 insertions(+), 97 deletions(-) create mode 100644 paddle/gserver/layers/ClipLayer.cpp diff --git a/paddle/gserver/layers/ClipLayer.cpp b/paddle/gserver/layers/ClipLayer.cpp new file mode 100644 index 0000000000..51f0e0d2f0 --- /dev/null +++ b/paddle/gserver/layers/ClipLayer.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * A layer for clipping the input value by the threshold. + * \f[ + * out[i] = \min\left(\max\left(in[i],p_{1}\right),p_{2}\right) + * \f] + */ + +class ClipLayer : public Layer { +protected: + real clipThresholdLow_; + real clipThresholdHigh_; + +public: + explicit ClipLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(clip, ClipLayer); + +bool ClipLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 1U); + auto layerConf = config_.inputs(0).clip_conf(); + clipThresholdLow_ = layerConf.clip_threshold_low(); + clipThresholdHigh_ = layerConf.clip_threshold_high(); + CHECK_LT(clipThresholdLow_, clipThresholdHigh_); + return true; +} + +void ClipLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr inV = getInputValue(0); + resetOutput(inV->getHeight(), inV->getWidth()); + MatrixPtr outV = getOutputValue(); + outV->copyFrom(*inV); + outV->clip(clipThresholdLow_, clipThresholdHigh_); +} + +void ClipLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV = getInputValue(0); + MatrixPtr inG = getInputGrad(0); + MatrixPtr outV = getOutputValue(); + MatrixPtr outG = getOutputGrad(); + MatrixPtr tmpMtx; + Matrix::resizeOrCreate( + tmpMtx, outG->getHeight(), outG->getWidth(), false, useGpu_); + tmpMtx->clipDerivative(*inV, clipThresholdLow_, clipThresholdHigh_); + inG->addDotMul(*outG, *tmpMtx, 1, 1); +} + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0975c3bc95..b0032adb39 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1879,6 +1879,21 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, ClipLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("clip"); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ClipConfig* layerConf = input->mutable_clip_conf(); + layerConf->set_clip_threshold_low(std::rand() / (real)RAND_MAX); + layerConf->set_clip_threshold_high(std::rand() / (real)RAND_MAX); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "clip", batchSize, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index de48b6fac9..6db5965789 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -442,6 +442,12 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER, template void BaseMatrixT::clip(T p1, T p2) { applyUnary(unary::Clip(p1, p2)); } +DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER, a = b < p1 ? 0 : (b > p2 ? 0 : 1)); +template +void BaseMatrixT::clipDerivative(BaseMatrixT& b, T p1, T p2) { + applyBinary(binary::ClipDerivative(p1, p2), b); +} + DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, ONE_PARAMETER, a = a > p ? 1.0f : 0.0f); template diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index 120d69f718..12ad2d45a0 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -488,6 +488,13 @@ public: */ void clip(T p1, T p2); + /** + * this = b < low ? 0 : 1 + * + * this = b > high ? 0 : 1 + */ + void clipDerivative(BaseMatrixT& b, T p1, T p2); + /** * @code * a = a > p ? 1.0f : 0.0f diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 83f72c137b..772fc3c4ca 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -289,6 +289,11 @@ message DetectionOutputConfig { optional uint32 width = 9 [default = 1]; } +message ClipConfig { + required float clip_threshold_low = 1; + required float clip_threshold_high = 2; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -309,6 +314,7 @@ message LayerInputConfig { optional RowConvConfig row_conv_conf = 15; optional MultiBoxLossConfig multibox_loss_conf = 16; optional DetectionOutputConfig detection_output_conf = 17; + optional ClipConfig clip_conf = 18; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5477158ecb..9b2e9ea784 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2169,6 +2169,23 @@ class RowConvLayer(LayerBase): self.create_input_parameter(0, psize, dims) +@config_layer('clip') +class ClipLayer(LayerBase): + def __init__(self, name, inputs, clip_threshold_low, clip_threshold_high): + super(ClipLayer, self).__init__(name, 'clip', 0, inputs=inputs) + config_assert( + len(self.inputs) == 1, + 'ClipLayer layer must have one and only one input.') + config_assert( + clip_threshold_low < clip_threshold_high, + 'clip_threshold_low must be less than clip_threshold_high.') + input_layer = self.get_input_layer(0) + self.set_layer_size(input_layer.size) + self.config.inputs[0].clip_conf.clip_threshold_low = clip_threshold_low + self.config.inputs[ + 0].clip_conf.clip_threshold_high = clip_threshold_high + + # key: cost type # value: cost class g_cost_map = {} diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 14f072fc55..9a002f1e68 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,103 +31,33 @@ except ImportError: import copy __all__ = [ - 'full_matrix_projection', - 'AggregateLevel', - 'ExpandLevel', - 'identity_projection', - 'dotmul_projection', - 'dotmul_operator', - 'repeat_layer', - 'seq_reshape_layer', - 'table_projection', - 'mixed_layer', - 'data_layer', - 'embedding_layer', - 'fc_layer', - 'grumemory', - 'pooling_layer', - 'lstmemory', - 'last_seq', - 'first_seq', - 'cos_sim', - 'hsigmoid', - 'conv_projection', - 'mse_cost', - 'regression_cost', - 'classification_cost', - 'LayerOutput', - 'img_conv_layer', - 'img_pool_layer', - 'batch_norm_layer', - 'img_cmrnorm_layer', - 'addto_layer', - 'concat_layer', - 'seq_concat_layer', - 'lstm_step_layer', - 'recurrent_group', - 'memory', - 'StaticInput', - 'expand_layer', - 'scaling_layer', - 'scaling_projection', - 'power_layer', - 'interpolation_layer', - 'bilinear_interp_layer', - 'trans_layer', - 'rotate_layer', - 'sum_to_one_norm_layer', - 'get_output_layer', - 'LayerType', - 'context_projection', - 'beam_search', - 'maxid_layer', - 'GeneratedInput', - 'SubsequenceInput', - 'gru_step_layer', - 'gru_step_naive_layer', - 'recurrent_layer', - 'BaseGeneratedInput', - 'conv_operator', - 'conv_shift_layer', - 'tensor_layer', - 'selective_fc_layer', - 'sampling_id_layer', - 'slope_intercept_layer', - 'trans_full_matrix_projection', - 'linear_comb_layer', - 'convex_comb_layer', - 'ctc_layer', - 'warp_ctc_layer', - 'crf_layer', - 'crf_decoding_layer', - 'nce_layer', - 'cross_entropy_with_selfnorm', - 'cross_entropy', - 'multi_binary_label_cross_entropy', - 'sum_cost', - 'rank_cost', - 'lambda_cost', - 'huber_cost', - 'block_expand_layer', - 'maxout_layer', - 'out_prod_layer', - 'printer_layer', - 'print_layer', - 'priorbox_layer', - 'cross_channel_norm_layer', - 'multibox_loss_layer', - 'detection_output_layer', - 'spp_layer', - 'pad_layer', - 'eos_layer', - 'smooth_l1_cost', - 'layer_support', - 'multiplex_layer', - 'row_conv_layer', - 'dropout_layer', - 'prelu_layer', - 'gated_unit_layer', - 'crop_layer', + 'full_matrix_projection', 'AggregateLevel', 'ExpandLevel', + 'identity_projection', 'dotmul_projection', 'dotmul_operator', + 'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer', + 'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer', + 'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid', + 'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost', + 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', + 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer', + 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', + 'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer', + 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', + 'rotate_layer', 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', + 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', + 'SubsequenceInput', 'gru_step_layer', 'gru_step_naive_layer', + 'recurrent_layer', 'BaseGeneratedInput', 'conv_operator', + 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', + 'sampling_id_layer', 'slope_intercept_layer', + 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', + 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', + 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', + 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', + 'printer_layer', 'print_layer', 'priorbox_layer', + 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support', + 'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer', + 'gated_unit_layer', 'crop_layer', 'clip_layer' ] @@ -220,6 +150,7 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' + CLIP_LAYER = 'clip' @staticmethod def is_layer_type(type_name): @@ -6006,3 +5937,36 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): layer_type=LayerType.CROP_LAYER, parents=input, size=l.config.size) + + +@wrap_name_default("clip") +def clip_layer(input, clip_threshold_low, clip_threshold_high, name=None): + """ + A layer for clipping the input value by the threshold. + + .. math:: + + out[i] = \min\left(\max\left(in[i],p_{1}\right),p_{2}\right) + + .. code-block:: python + + clip = clip_layer(input=input_layer, clip_threshold_low=-10, clip_threshold_high=10) + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput. + :param clip_threshold_low: The lower threshold for clipping. + :type clip_threshold_low: float + :param clip_threshold_high: The upper threshold for clipping. + :type clip_threshold_high: float + :return: LayerOutput + """ + Layer( + name=name, + type=LayerType.CLIP_LAYER, + inputs=[input.name], + clip_threshold_low=clip_threshold_low, + clip_threshold_high=clip_threshold_high) + return LayerOutput( + name, LayerType.CLIP_LAYER, parents=[input], size=input.size) From 47d8bca84864ce72b7e8dc9aed10cd448c2c111f Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 31 Jul 2017 10:37:16 +0800 Subject: [PATCH 10/26] fix build error --- paddle/framework/tensor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index d9ceedb453..3e110f8d74 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -103,6 +103,7 @@ class Tensor { * @param[in] begin_idx The begin index of the slice. * @param[in] end_idx The end index of the slice. */ + template inline Tensor Slice(const int& begin_idx, const int& end_idx) const; private: From 4a1f7bd21fc45d6051fe3d20da0c44b498daad2e Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 31 Jul 2017 17:10:17 +0800 Subject: [PATCH 11/26] add gpu python op test --- paddle/framework/detail/tensor-inl.h | 8 ++- paddle/platform/enforce.h | 12 ++-- paddle/pybind/pybind.cc | 33 +++++++--- .../paddle/v2/framework/tests/op_test_util.py | 62 ++++++++++--------- .../paddle/v2/framework/tests/test_fc_op.py | 2 +- 5 files changed, 70 insertions(+), 47 deletions(-) diff --git a/paddle/framework/detail/tensor-inl.h b/paddle/framework/detail/tensor-inl.h index e7ff09dd5c..9e8983e1fd 100644 --- a/paddle/framework/detail/tensor-inl.h +++ b/paddle/framework/detail/tensor-inl.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - +#include #include "paddle/memory/memcpy.h" namespace paddle { @@ -62,9 +62,11 @@ inline T* Tensor::mutable_data(platform::Place place) { if (platform::is_cpu_place(place)) { holder_.reset(new PlaceholderImpl( boost::get(place), size)); + } else if (platform::is_gpu_place(place)) { +#ifdef PADDLE_ONLY_CPU + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); } -#ifndef PADDLE_ONLY_CPU - else if (platform::is_gpu_place(place)) { +#else holder_.reset(new PlaceholderImpl( boost::get(place), size)); } diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index fd4adbd9de..0b90d26b5e 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -132,12 +132,12 @@ inline void throw_on_error(T e) { throw_on_error(e, ""); } -#define PADDLE_THROW(...) \ - do { \ - throw ::paddle::platform::EnforceNotMet( \ - std::make_exception_ptr( \ - std::runtime_error(string::Sprintf(__VA_ARGS__))), \ - __FILE__, __LINE__); \ +#define PADDLE_THROW(...) \ + do { \ + throw ::paddle::platform::EnforceNotMet( \ + std::make_exception_ptr( \ + std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \ + __FILE__, __LINE__); \ } while (0) #define PADDLE_ENFORCE(...) \ diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 7ef62c27c3..548277235e 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -56,6 +56,14 @@ static size_t UniqueIntegerGenerator() { return generator.fetch_add(1); } +bool IsCompileGPU() { +#ifdef PADDLE_ONLY_CPU + return false; +#else + return true; +#endif +} + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of PaddlePaddle"); @@ -148,18 +156,23 @@ All parameter, weight, gradient are variables in Paddle. .def("temp", pd::OperatorBase::TMP_VAR_NAME); py::class_(m, "DeviceContext") - .def_static("cpu_context", - []() -> paddle::platform::DeviceContext* { - return new paddle::platform::CPUDeviceContext(); - }) -#ifndef PADDLE_ONLY_CPU - .def_static("gpu_context", - [](paddle::platform::GPUPlace& place) + .def_static("create", + [](paddle::platform::CPUPlace& place) -> paddle::platform::DeviceContext* { - return new paddle::platform::CUDADeviceContext(place); + return new paddle::platform::CPUDeviceContext(); }) + .def_static( + "create", + [](paddle::platform::GPUPlace& place) + -> paddle::platform::DeviceContext* { +#ifdef PADDLE_ONLY_CPU + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); + +#else + return new paddle::platform::CUDADeviceContext(place); #endif - ; // NOLINT + }); + py::class_(m, "GPUPlace").def(py::init()); py::class_(m, "CPUPlace").def(py::init<>()); @@ -198,5 +211,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("unique_integer", UniqueIntegerGenerator); + m.def("is_compile_gpu", IsCompileGPU); + return m.ptr(); } diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 35ee955585..a858b32bf1 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -25,42 +25,48 @@ class OpTestMeta(type): self.assertIsNotNone(func) scope = core.Scope(None) - place = core.CPUPlace() + kwargs = dict() - for in_name in func.all_input_args: - if hasattr(self, in_name): - kwargs[in_name] = in_name - var = scope.create_var(in_name).get_tensor() - arr = getattr(self, in_name) - var.set_dims(arr.shape) - var.set(arr, place) - else: - kwargs[in_name] = "@EMPTY@" + places = [] + places.append(core.CPUPlace()) + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + + for place in places: + for in_name in func.all_input_args: + if hasattr(self, in_name): + kwargs[in_name] = in_name + var = scope.create_var(in_name).get_tensor() + arr = getattr(self, in_name) + var.set_dims(arr.shape) + var.set(arr, place) + else: + kwargs[in_name] = "@EMPTY@" - for out_name in func.all_output_args: - if hasattr(self, out_name): - kwargs[out_name] = out_name - scope.create_var(out_name).get_tensor() + for out_name in func.all_output_args: + if hasattr(self, out_name): + kwargs[out_name] = out_name + scope.create_var(out_name).get_tensor() - for attr_name in func.all_attr_args: - if hasattr(self, attr_name): - kwargs[attr_name] = getattr(self, attr_name) + for attr_name in func.all_attr_args: + if hasattr(self, attr_name): + kwargs[attr_name] = getattr(self, attr_name) - op = func(**kwargs) + op = func(**kwargs) - op.infer_shape(scope) + op.infer_shape(scope) - ctx = core.DeviceContext.cpu_context() - op.run(scope, ctx) + ctx = core.DeviceContext.create(place) + op.run(scope, ctx) - for out_name in func.all_output_args: - actual = numpy.array(scope.get_var(out_name).get_tensor()) - expect = getattr(self, out_name) - # TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul - # has some diff, and could not pass unittest. So I set decimal 3 here. - # And I will check this in future. - numpy.testing.assert_almost_equal(actual, expect, decimal=3) + for out_name in func.all_output_args: + actual = numpy.array(scope.get_var(out_name).get_tensor()) + expect = getattr(self, out_name) + # TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul + # has some diff, and could not pass unittest. So I set decimal 3 here. + # And I will check this in future. + numpy.testing.assert_almost_equal(actual, expect, decimal=3) obj.test_all = test_all return obj diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py index d5fd590892..f274f66c24 100644 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ b/python/paddle/v2/framework/tests/test_fc_op.py @@ -33,7 +33,7 @@ class TestFc(unittest.TestCase): op.infer_shape(scope) self.assertEqual([1000, 100], tensor.shape()) - ctx = core.DeviceContext.cpu_context() + ctx = core.DeviceContext.create(place) op.run(scope, ctx) From 61f94f00027fc4e6e6558303316c0972856e3bea Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 31 Jul 2017 17:45:25 +0800 Subject: [PATCH 12/26] add EIGEN_USE_GPU macro to op.cu file --- paddle/operators/add_op.cu | 1 + paddle/operators/cross_entropy_op.cu | 1 + paddle/operators/mul_op.cu | 1 + paddle/operators/rowwise_add_op.cu | 1 + paddle/operators/sgd_op.cu | 1 + paddle/operators/sigmoid_op.cu | 1 + paddle/operators/softmax_op.cu | 1 + python/paddle/v2/framework/tests/CMakeLists.txt | 1 - 8 files changed, 7 insertions(+), 1 deletion(-) diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 79d8de6cd4..f961b37565 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 19e4b74596..926a0c616b 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/operators/cross_entropy_op.h" REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index c27fc886ce..dc92367016 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ +#define EIGEN_USE_GPU #include "paddle/operators/mul_op.h" REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); \ No newline at end of file diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 4b33e38eba..82338ceccc 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/operators/rowwise_add_op.h" REGISTER_OP_GPU_KERNEL(rowwise_add, diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index f8f5b90cab..d79258cbf1 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/operators/sgd_op.h" REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); \ No newline at end of file diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index f679b20418..c9d11a2e1f 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/operators/sigmoid_op.h" REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index a1f6944a36..ddf8f6e913 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" #include "paddle/operators/softmax_op.h" diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index cdaaa60674..007ba1f01d 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -8,7 +8,6 @@ add_python_test(test_framework test_fc_op.py test_add_two_op.py test_sgd_op.py - test_cross_entropy_op.py test_mul_op.py test_sigmoid_op.py test_softmax_op.py From cf5ac5888edbd970525d409dd3ad0a08ab544b5d Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 31 Jul 2017 17:46:48 +0800 Subject: [PATCH 13/26] reduce gpu memory allocation in op_test --- python/paddle/v2/framework/tests/test_add_two_op.py | 4 ++-- python/paddle/v2/framework/tests/test_mul_op.py | 4 ++-- python/paddle/v2/framework/tests/test_rowwise_add_op.py | 4 ++-- python/paddle/v2/framework/tests/test_sgd_op.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index a06d7a78ec..73b3734909 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -8,8 +8,8 @@ class TestAddOp(unittest.TestCase): def setUp(self): self.type = "add_two" - self.X = numpy.random.random((342, 345)).astype("float32") - self.Y = numpy.random.random((342, 345)).astype("float32") + self.X = numpy.random.random((102, 105)).astype("float32") + self.Y = numpy.random.random((102, 105)).astype("float32") self.Out = self.X + self.Y diff --git a/python/paddle/v2/framework/tests/test_mul_op.py b/python/paddle/v2/framework/tests/test_mul_op.py index 0a87e66cd0..e1ac66d3a4 100644 --- a/python/paddle/v2/framework/tests/test_mul_op.py +++ b/python/paddle/v2/framework/tests/test_mul_op.py @@ -8,8 +8,8 @@ class TestMulOp(unittest.TestCase): def setUp(self): self.type = "mul" - self.X = np.random.random((32, 784)).astype("float32") - self.Y = np.random.random((784, 100)).astype("float32") + self.X = np.random.random((32, 84)).astype("float32") + self.Y = np.random.random((84, 100)).astype("float32") self.Out = np.dot(self.X, self.Y) diff --git a/python/paddle/v2/framework/tests/test_rowwise_add_op.py b/python/paddle/v2/framework/tests/test_rowwise_add_op.py index ef1514983c..04abc14ee1 100644 --- a/python/paddle/v2/framework/tests/test_rowwise_add_op.py +++ b/python/paddle/v2/framework/tests/test_rowwise_add_op.py @@ -8,8 +8,8 @@ class TestRowwiseAddOp(unittest.TestCase): def setUp(self): self.type = "rowwise_add" - self.X = np.random.random((32, 784)).astype("float32") - self.b = np.random.random(784).astype("float32") + self.X = np.random.random((32, 84)).astype("float32") + self.b = np.random.random(84).astype("float32") self.Out = np.add(self.X, self.b) diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index 405d73b224..ca03cc11ab 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -8,8 +8,8 @@ class TestSGD(unittest.TestCase): def setUp(self): self.type = "sgd" - self.param = numpy.random.random((342, 345)).astype("float32") - self.grad = numpy.random.random((342, 345)).astype("float32") + self.param = numpy.random.random((102, 105)).astype("float32") + self.grad = numpy.random.random((102, 105)).astype("float32") self.learning_rate = 0.1 self.param_out = self.param - self.learning_rate * self.grad From db4d668f93709e2f30ef598f625525a6109055bf Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 31 Jul 2017 17:55:14 +0800 Subject: [PATCH 14/26] remove unused codes --- paddle/framework/detail/tensor-inl.h | 1 - paddle/framework/tensor.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/paddle/framework/detail/tensor-inl.h b/paddle/framework/detail/tensor-inl.h index 9e8983e1fd..92621f8c18 100644 --- a/paddle/framework/detail/tensor-inl.h +++ b/paddle/framework/detail/tensor-inl.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include #include "paddle/memory/memcpy.h" namespace paddle { diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 3e110f8d74..76070f636b 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -19,7 +19,6 @@ limitations under the License. */ #include #include #include "paddle/framework/ddim.h" -#include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" @@ -110,8 +109,6 @@ class Tensor { template inline void check_memory_size() const; - paddle::platform::Place place() const { return holder_->place(); } - private: /** * @note Placeholder hides type T, so it doesn't appear as a template From bc7be2aa14d85b523f370386b780a921662f96ac Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 31 Jul 2017 11:12:12 +0000 Subject: [PATCH 15/26] pass precommit --- paddle/pybind/pybind.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 548277235e..f96540a064 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -159,19 +159,18 @@ All parameter, weight, gradient are variables in Paddle. .def_static("create", [](paddle::platform::CPUPlace& place) -> paddle::platform::DeviceContext* { - return new paddle::platform::CPUDeviceContext(); - }) + return new paddle::platform::CPUDeviceContext(); + }) .def_static( "create", [](paddle::platform::GPUPlace& place) -> paddle::platform::DeviceContext* { #ifdef PADDLE_ONLY_CPU - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); - + PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); #else - return new paddle::platform::CUDADeviceContext(place); + return new paddle::platform::CUDADeviceContext(place); #endif - }); + }); py::class_(m, "GPUPlace").def(py::init()); From edb57292f0ce31cba94dbdc06a03d167943af7f3 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 1 Aug 2017 06:40:07 +0000 Subject: [PATCH 16/26] add cmake patch for gcc version larger than 4.9 --- cmake/flags.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index ef31c25203..d00a9bb3a3 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -9,6 +9,11 @@ function(CheckCompilerCXX11Flag) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") endif() + # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use Debug mode instead for now. + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" # Apple Clang is a different compiler than upstream Clang which havs different version numbers. From f35e08471c827ea1967990bcce18d30a3ac54745 Mon Sep 17 00:00:00 2001 From: guosheng Date: Tue, 1 Aug 2017 20:10:38 +0800 Subject: [PATCH 17/26] revise the format of __all__ in layers.py --- .../paddle/trainer_config_helpers/layers.py | 125 ++++++++++++++---- 1 file changed, 98 insertions(+), 27 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 9a002f1e68..bd79bf66b0 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,33 +31,104 @@ except ImportError: import copy __all__ = [ - 'full_matrix_projection', 'AggregateLevel', 'ExpandLevel', - 'identity_projection', 'dotmul_projection', 'dotmul_operator', - 'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer', - 'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer', - 'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid', - 'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost', - 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', - 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer', - 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', - 'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer', - 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', - 'rotate_layer', 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', - 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', - 'SubsequenceInput', 'gru_step_layer', 'gru_step_naive_layer', - 'recurrent_layer', 'BaseGeneratedInput', 'conv_operator', - 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', - 'sampling_id_layer', 'slope_intercept_layer', - 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', - 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', - 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', - 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', - 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', - 'printer_layer', 'print_layer', 'priorbox_layer', - 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', - 'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support', - 'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer', - 'gated_unit_layer', 'crop_layer', 'clip_layer' + 'full_matrix_projection', + 'AggregateLevel', + 'ExpandLevel', + 'identity_projection', + 'dotmul_projection', + 'dotmul_operator', + 'repeat_layer', + 'seq_reshape_layer', + 'table_projection', + 'mixed_layer', + 'data_layer', + 'embedding_layer', + 'fc_layer', + 'grumemory', + 'pooling_layer', + 'lstmemory', + 'last_seq', + 'first_seq', + 'cos_sim', + 'hsigmoid', + 'conv_projection', + 'mse_cost', + 'regression_cost', + 'classification_cost', + 'LayerOutput', + 'img_conv_layer', + 'img_pool_layer', + 'batch_norm_layer', + 'img_cmrnorm_layer', + 'addto_layer', + 'concat_layer', + 'seq_concat_layer', + 'lstm_step_layer', + 'recurrent_group', + 'memory', + 'StaticInput', + 'expand_layer', + 'scaling_layer', + 'scaling_projection', + 'power_layer', + 'interpolation_layer', + 'bilinear_interp_layer', + 'trans_layer', + 'rotate_layer', + 'sum_to_one_norm_layer', + 'get_output_layer', + 'LayerType', + 'context_projection', + 'beam_search', + 'maxid_layer', + 'GeneratedInput', + 'SubsequenceInput', + 'gru_step_layer', + 'gru_step_naive_layer', + 'recurrent_layer', + 'BaseGeneratedInput', + 'conv_operator', + 'conv_shift_layer', + 'tensor_layer', + 'selective_fc_layer', + 'sampling_id_layer', + 'slope_intercept_layer', + 'trans_full_matrix_projection', + 'linear_comb_layer', + 'convex_comb_layer', + 'ctc_layer', + 'warp_ctc_layer', + 'crf_layer', + 'crf_decoding_layer', + 'nce_layer', + 'cross_entropy_with_selfnorm', + 'cross_entropy', + 'multi_binary_label_cross_entropy', + 'sum_cost', + 'rank_cost', + 'lambda_cost', + 'huber_cost', + 'block_expand_layer', + 'maxout_layer', + 'out_prod_layer', + 'printer_layer', + 'print_layer', + 'priorbox_layer', + 'cross_channel_norm_layer', + 'multibox_loss_layer', + 'detection_output_layer', + 'spp_layer', + 'pad_layer', + 'eos_layer', + 'smooth_l1_cost', + 'layer_support', + 'multiplex_layer', + 'row_conv_layer', + 'dropout_layer', + 'prelu_layer', + 'gated_unit_layer', + 'crop_layer', + 'clip_layer', ] From d19355a53221e43ff93b036433cef101cfc30821 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 2 Aug 2017 15:50:50 +0800 Subject: [PATCH 18/26] Refine ClipLayer and add unit test for it --- doc/api/v2/config/layer.rst | 5 ++++ paddle/gserver/layers/ClipLayer.cpp | 29 ++++++++++--------- paddle/gserver/tests/test_LayerGrad.cpp | 4 +-- proto/ModelConfig.proto | 4 +-- python/paddle/trainer/config_parser.py | 15 ++++------ .../paddle/trainer_config_helpers/layers.py | 16 +++++----- .../tests/configs/file_list.sh | 2 +- .../tests/configs/test_clip_layer.py | 6 ++++ 8 files changed, 45 insertions(+), 36 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index daee55b7f9..d7eff17734 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -316,6 +316,11 @@ scaling .. autoclass:: paddle.v2.layer.scaling :noindex: +clip +---- +.. autoclass:: paddle.v2.layer.clip + :noindex: + slope_intercept --------------- .. autoclass:: paddle.v2.layer.slope_intercept diff --git a/paddle/gserver/layers/ClipLayer.cpp b/paddle/gserver/layers/ClipLayer.cpp index 51f0e0d2f0..13f16c9537 100644 --- a/paddle/gserver/layers/ClipLayer.cpp +++ b/paddle/gserver/layers/ClipLayer.cpp @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" namespace paddle { @@ -26,8 +25,8 @@ namespace paddle { class ClipLayer : public Layer { protected: - real clipThresholdLow_; - real clipThresholdHigh_; + double min_; + double max_; public: explicit ClipLayer(const LayerConfig& config) : Layer(config) {} @@ -47,9 +46,9 @@ bool ClipLayer::init(const LayerMap& layerMap, CHECK_EQ(inputLayers_.size(), 1U); auto layerConf = config_.inputs(0).clip_conf(); - clipThresholdLow_ = layerConf.clip_threshold_low(); - clipThresholdHigh_ = layerConf.clip_threshold_high(); - CHECK_LT(clipThresholdLow_, clipThresholdHigh_); + min_ = layerConf.min(); + max_ = layerConf.max(); + CHECK_LT(min_, max_); return true; } @@ -60,19 +59,21 @@ void ClipLayer::forward(PassType passType) { resetOutput(inV->getHeight(), inV->getWidth()); MatrixPtr outV = getOutputValue(); outV->copyFrom(*inV); - outV->clip(clipThresholdLow_, clipThresholdHigh_); + outV->clip(min_, max_); } void ClipLayer::backward(const UpdateCallback& callback) { MatrixPtr inV = getInputValue(0); MatrixPtr inG = getInputGrad(0); - MatrixPtr outV = getOutputValue(); - MatrixPtr outG = getOutputGrad(); - MatrixPtr tmpMtx; - Matrix::resizeOrCreate( - tmpMtx, outG->getHeight(), outG->getWidth(), false, useGpu_); - tmpMtx->clipDerivative(*inV, clipThresholdLow_, clipThresholdHigh_); - inG->addDotMul(*outG, *tmpMtx, 1, 1); + if (inG) { + MatrixPtr outV = getOutputValue(); + MatrixPtr outG = getOutputGrad(); + MatrixPtr tmpMtx; + Matrix::resizeOrCreate( + tmpMtx, outG->getHeight(), outG->getWidth(), false, useGpu_); + tmpMtx->clipDerivative(*inV, min_, max_); + inG->addDotMul(*outG, *tmpMtx, 1, 1); + } } } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index b0032adb39..f01bf3bc78 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1887,8 +1887,8 @@ TEST(Layer, ClipLayer) { config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); LayerInputConfig* input = config.layerConfig.add_inputs(); ClipConfig* layerConf = input->mutable_clip_conf(); - layerConf->set_clip_threshold_low(std::rand() / (real)RAND_MAX); - layerConf->set_clip_threshold_high(std::rand() / (real)RAND_MAX); + layerConf->set_min(std::rand() / (double)RAND_MAX); + layerConf->set_max(std::rand() / (double)RAND_MAX); for (auto useGpu : {false, true}) { testLayerGrad(config, "clip", batchSize, false, useGpu, false); } diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 772fc3c4ca..5ceb16a7b6 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -290,8 +290,8 @@ message DetectionOutputConfig { } message ClipConfig { - required float clip_threshold_low = 1; - required float clip_threshold_high = 2; + required double min = 1; + required double max = 2; } message LayerInputConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9b2e9ea784..637f70f39c 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2171,19 +2171,16 @@ class RowConvLayer(LayerBase): @config_layer('clip') class ClipLayer(LayerBase): - def __init__(self, name, inputs, clip_threshold_low, clip_threshold_high): - super(ClipLayer, self).__init__(name, 'clip', 0, inputs=inputs) + def __init__(self, name, inputs, min, max, **xargs): + super(ClipLayer, self).__init__(name, 'clip', 0, inputs=inputs, **xargs) config_assert( len(self.inputs) == 1, - 'ClipLayer layer must have one and only one input.') - config_assert( - clip_threshold_low < clip_threshold_high, - 'clip_threshold_low must be less than clip_threshold_high.') + 'ClipLayer must have one and only one input.') + config_assert(min < max, 'min must be less than max.') input_layer = self.get_input_layer(0) self.set_layer_size(input_layer.size) - self.config.inputs[0].clip_conf.clip_threshold_low = clip_threshold_low - self.config.inputs[ - 0].clip_conf.clip_threshold_high = clip_threshold_high + self.config.inputs[0].clip_conf.min = min + self.config.inputs[0].clip_conf.max = max # key: cost type diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index bd79bf66b0..33a7fdb3da 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6011,7 +6011,7 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): @wrap_name_default("clip") -def clip_layer(input, clip_threshold_low, clip_threshold_high, name=None): +def clip_layer(input, min, max, name=None): """ A layer for clipping the input value by the threshold. @@ -6021,23 +6021,23 @@ def clip_layer(input, clip_threshold_low, clip_threshold_high, name=None): .. code-block:: python - clip = clip_layer(input=input_layer, clip_threshold_low=-10, clip_threshold_high=10) + clip = clip_layer(input=input_layer, min=-10, max=10) :param name: The Layer Name. :type name: basestring :param input: The input layer. :type input: LayerOutput. - :param clip_threshold_low: The lower threshold for clipping. - :type clip_threshold_low: float - :param clip_threshold_high: The upper threshold for clipping. - :type clip_threshold_high: float + :param min: The lower threshold for clipping. + :type min: double + :param max: The upper threshold for clipping. + :type max: double :return: LayerOutput """ Layer( name=name, type=LayerType.CLIP_LAYER, inputs=[input.name], - clip_threshold_low=clip_threshold_low, - clip_threshold_high=clip_threshold_high) + min=min, + max=max) return LayerOutput( name, LayerType.CLIP_LAYER, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index cdf9b2eab7..d71c41f77e 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer) +test_recursive_topology test_gated_unit_layer test_clip_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py new file mode 100644 index 0000000000..f066fe1fb3 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py @@ -0,0 +1,6 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +clip = clip_layer(input=data, min=-10, max=10) + +outputs(clip) From d1a691733b1be0a266b6b4bb9535ea1a9b729075 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 2 Aug 2017 16:17:20 +0800 Subject: [PATCH 19/26] fix bug on ClipLayer test in test_LayerGrad --- paddle/gserver/tests/test_LayerGrad.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index c19bcc153d..c726e5a74e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1907,8 +1907,10 @@ TEST(Layer, ClipLayer) { config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); LayerInputConfig* input = config.layerConfig.add_inputs(); ClipConfig* layerConf = input->mutable_clip_conf(); - layerConf->set_min(std::rand() / (double)RAND_MAX); - layerConf->set_max(std::rand() / (double)RAND_MAX); + double p1 = std::rand() / (double)RAND_MAX; + double p2 = std::rand() / (double)RAND_MAX; + layerConf->set_min(std::min(p1, p2)); + layerConf->set_max(std::max(p1, p2)); for (auto useGpu : {false, true}) { testLayerGrad(config, "clip", batchSize, false, useGpu, false); } From 8f88972d530425694bf4050385fc0fda8a5764c9 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Wed, 2 Aug 2017 16:56:23 +0800 Subject: [PATCH 20/26] add wget in docker production image --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 3860facb09..69ae0ea2d7 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -148,7 +148,7 @@ cat >> /paddle/build/Dockerfile < Date: Wed, 2 Aug 2017 09:49:06 +0000 Subject: [PATCH 21/26] pass pre commit --- paddle/pybind/pybind.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index e2c20ef883..d3cde07bd0 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -158,7 +158,7 @@ All parameter, weight, gradient are variables in Paddle. "The module will return special predefined variable name in Paddle") .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) .def("temp", pd::OperatorBase::TMP_VAR_NAME); - //clang-format off + // clang-format off py::class_(m, "DeviceContext") .def_static("create", [](paddle::platform::CPUPlace& place) @@ -174,7 +174,7 @@ All parameter, weight, gradient are variables in Paddle. return new paddle::platform::CUDADeviceContext(place); #endif }); - //clang-format on + // clang-format on py::class_(m, "GPUPlace").def(py::init()); From 02655a229e148e31590165faeca2f6e553816732 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 2 Aug 2017 18:16:04 +0800 Subject: [PATCH 22/26] Return Reference Instead Pointer to GetEigenDevice --- paddle/framework/operator.cc | 8 ++++---- paddle/framework/operator.h | 2 +- paddle/operators/add_op.h | 3 +-- paddle/operators/mean_op.h | 2 +- paddle/operators/mul_op.h | 2 +- paddle/operators/rowwise_add_op.h | 2 +- paddle/operators/sgd_op.h | 2 +- paddle/operators/sigmoid_op.h | 3 +-- paddle/operators/softmax_op.h | 4 ++-- 9 files changed, 13 insertions(+), 15 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index cfe9cba308..cb86e6be2b 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -20,16 +20,16 @@ namespace paddle { namespace framework { template <> -Eigen::DefaultDevice* ExecutionContext::GetEigenDevice< +Eigen::DefaultDevice& ExecutionContext::GetEigenDevice< platform::CPUPlace, Eigen::DefaultDevice>() const { - return device_context_.get_eigen_device(); + return *device_context_.get_eigen_device(); } #ifndef PADDLE_ONLY_CPU template <> -Eigen::GpuDevice* +Eigen::GpuDevice& ExecutionContext::GetEigenDevice() const { - return device_context_.get_eigen_device(); + return *device_context_.get_eigen_device(); } #endif diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 0832a663dd..5543510348 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -253,7 +253,7 @@ class ExecutionContext : public OperatorContext { template ::EigenDeviceType> - DeviceType* GetEigenDevice() const; + DeviceType& GetEigenDevice() const; platform::Place GetPlace() const { return device_context_.GetPlace(); } diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index d2b649fcbd..a4ee407cae 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -28,8 +28,7 @@ public: output->mutable_data(context.GetPlace()); - EigenVector::Flatten(*output).device( - *(context.GetEigenDevice())) = + EigenVector::Flatten(*output).device(context.GetEigenDevice()) = framework::EigenVector::Flatten(*input0) + framework::EigenVector::Flatten(*input1); } diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 5f7d443751..20f2110529 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -27,7 +27,7 @@ public: output->mutable_data(context.GetPlace()); - EigenScalar::From(*output).device(*(context.GetEigenDevice())) = + EigenScalar::From(*output).device(context.GetEigenDevice()) = EigenVector::Flatten(*input).mean(); } }; diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index eef72ab293..1d0617ab8b 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -29,7 +29,7 @@ public: auto output = context.Output(0); output->mutable_data(context.GetPlace()); - EigenMatrix::From(*output).device(*(context.GetEigenDevice())) = + EigenMatrix::From(*output).device(context.GetEigenDevice()) = EigenMatrix::From(*context.Input("X")) .contract(EigenMatrix::From(*context.Input("Y")), dim_pair); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index b86dd54634..bd4d112895 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -33,7 +33,7 @@ public: const int rest_size = input.size() / bias_size; Eigen::DSizes one_d(input.size()); Eigen::DSizes bcast(rest_size); - output.reshape(one_d).device(*(context.GetEigenDevice())) = + output.reshape(one_d).device(context.GetEigenDevice()) = input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d); } }; diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index af1dfdd756..d8ddbac573 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -29,7 +29,7 @@ public: param_out->mutable_data(ctx.GetPlace()); - EigenVector::Flatten(*param_out).device(*(ctx.GetEigenDevice())) = + EigenVector::Flatten(*param_out).device(ctx.GetEigenDevice()) = EigenVector::Flatten(*param) - lr * EigenVector::Flatten(*grad); } }; diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 3dd23a9ebc..f518ddcf3b 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -27,8 +27,7 @@ public: auto output = context.Output(0); output->mutable_data(context.GetPlace()); - EigenVector::Flatten(*output).device( - *(context.GetEigenDevice())) = + EigenVector::Flatten(*output).device(context.GetEigenDevice()) = 1.0 / (1.0 + (-1.0 * EigenVector::Flatten(*input)).exp()); } }; diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index a5c19c5fc7..75c5197697 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -46,9 +46,9 @@ public: .reshape(batch_by_one) .broadcast(one_by_class)); - softmax.device(*(context.GetEigenDevice())) = shifted_logits.exp(); + softmax.device(context.GetEigenDevice()) = shifted_logits.exp(); - softmax.device(*(context.GetEigenDevice())) = + softmax.device(context.GetEigenDevice()) = (softmax * softmax.sum(along_class) .inverse() From 17e16c2548e95c84bffc4bed5c8199b8836841fb Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 2 Aug 2017 19:43:04 +0800 Subject: [PATCH 23/26] Refine RowL2NormLayer and add python unit test for it --- doc/api/v2/config/layer.rst | 5 ++++ paddle/gserver/layers/RowL2NormLayer.cpp | 25 +++++++++-------- python/paddle/trainer/config_parser.py | 8 +++--- .../tests/configs/file_list.sh | 2 +- .../protostr/test_row_l2_norm_layer.protostr | 27 +++++++++++++++++++ .../tests/configs/test_row_l2_norm_layer.py | 6 +++++ 6 files changed, 55 insertions(+), 18 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index daee55b7f9..9a317d416c 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -104,6 +104,11 @@ cross_channel_norm ------------------ .. autoclass:: paddle.v2.layer.cross_channel_norm :noindex: + +row_l2_norm +----------- +.. autoclass:: paddle.v2.layer.row_l2_norm + :noindex: Recurrent Layers ================ diff --git a/paddle/gserver/layers/RowL2NormLayer.cpp b/paddle/gserver/layers/RowL2NormLayer.cpp index 1362c6ef12..0d609be43b 100644 --- a/paddle/gserver/layers/RowL2NormLayer.cpp +++ b/paddle/gserver/layers/RowL2NormLayer.cpp @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" namespace paddle { @@ -29,7 +28,7 @@ namespace paddle { class RowL2NormLayer : public Layer { protected: MatrixPtr inSquare_; - MatrixPtr reciSqrtRowSquareSum_; + MatrixPtr l2NormReciprocal_; MatrixPtr dotSum_; public: @@ -67,11 +66,11 @@ void RowL2NormLayer::forward(PassType passType) { Matrix::resizeOrCreate(inSquare_, batchSize, dataDim, false, useGpu_); inV->square2(*inSquare_); - Matrix::resizeOrCreate(reciSqrtRowSquareSum_, batchSize, 1, false, useGpu_); - inSquare_->rowSum(*reciSqrtRowSquareSum_); - reciSqrtRowSquareSum_->sqrt2(*reciSqrtRowSquareSum_); - reciSqrtRowSquareSum_->scalarDiv(*reciSqrtRowSquareSum_, 1.0); - outV->rowScale(0, *inV, *reciSqrtRowSquareSum_); + Matrix::resizeOrCreate(l2NormReciprocal_, batchSize, 1, false, useGpu_); + inSquare_->rowSum(*l2NormReciprocal_); + l2NormReciprocal_->sqrt2(*l2NormReciprocal_); + l2NormReciprocal_->scalarDiv(*l2NormReciprocal_, 1.0); + outV->rowScale(0, *inV, *l2NormReciprocal_); } void RowL2NormLayer::backward(const UpdateCallback& callback) { @@ -81,18 +80,18 @@ void RowL2NormLayer::backward(const UpdateCallback& callback) { MatrixPtr outG = getOutputGrad(); size_t batchSize = inV->getHeight(); - // inG[ij] += outG[ij] / reciSqrtRowSquareSum - // inG[ij] += -inV[ij] * reciSqrtRowSquareSum * reciSqrtRowSquareSum * - // DotMul(outG[i], inV[i]) + // inG[ij] += outG[ij] / l2NormReciprocal + // inG[ij] += -inV[ij] * l2NormReciprocal * l2NormReciprocal * DotMul(outG[i], + // inV[i]) if (inG) { Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_); dotSum_->zeroMem(); dotSum_->rowDotMul(0, *outG, *outV); - dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_); - dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_); + dotSum_->dotMul(*dotSum_, *l2NormReciprocal_); + dotSum_->dotMul(*dotSum_, *l2NormReciprocal_); inSquare_->rowScale(0, *inV, *dotSum_); inG->sub(*inSquare_); - inG->addRowScale(0, *outG, *reciSqrtRowSquareSum_); + inG->addRowScale(0, *outG, *l2NormReciprocal_); } } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c5e56e59de..3587ea1752 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2727,12 +2727,12 @@ class SumToOneNormLayer(LayerBase): @config_layer('row_l2_norm') class RowL2NormLayer(LayerBase): - def __init__(self, name, inputs, device=None): + def __init__(self, name, inputs, **xargs): super(RowL2NormLayer, self).__init__( - name, 'row_l2_norm', 0, inputs=inputs, device=device) + name, 'row_l2_norm', 0, inputs=inputs, **xargs) config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input') - input_layer0 = self.get_input_layer(0) - self.set_layer_size(input_layer0.size) + input_layer = self.get_input_layer(0) + self.set_layer_size(input_layer.size) @config_layer('cos_vm') diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index cdf9b2eab7..5b7ad22a13 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer) +test_recursive_topology test_gated_unit_layer test_row_l2_norm_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr new file mode 100644 index 0000000000..c2786ff55c --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr @@ -0,0 +1,27 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__row_l2_norm_layer_0__" + type: "row_l2_norm" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + } +} +input_layer_names: "input" +output_layer_names: "__row_l2_norm_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__row_l2_norm_layer_0__" + input_layer_names: "input" + output_layer_names: "__row_l2_norm_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py new file mode 100644 index 0000000000..ac8badb26a --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py @@ -0,0 +1,6 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +row_l2_norm = row_l2_norm_layer(input=data) + +outputs(row_l2_norm) From 5d644994d348126b7dd96017a389a5cfe7c3e66f Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 2 Aug 2017 19:51:31 +0800 Subject: [PATCH 24/26] Add test_clip_layer.protostr for ClipLayer python unit test --- .../configs/protostr/test_clip_layer.protostr | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr new file mode 100644 index 0000000000..4b9578a0c0 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr @@ -0,0 +1,31 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__clip_0__" + type: "clip" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + clip_conf { + min: -10 + max: 10 + } + } +} +input_layer_names: "input" +output_layer_names: "__clip_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__clip_0__" + input_layer_names: "input" + output_layer_names: "__clip_0__" + is_recurrent_layer_group: false +} + From eed1a0dd542df8f86356132e2cd8b6ef7b830a15 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 2 Aug 2017 14:56:24 -0700 Subject: [PATCH 25/26] Constraint GCC version to 4.8 in Dockerfile so to be compatible with Eigen GPU code --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8cfb16928c..5dd9b0be4f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,7 @@ RUN apt-get update && \ git python-pip python-dev openssh-server bison \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ - python-numpy python-matplotlib gcc g++ \ + python-numpy python-matplotlib gcc-4.8 g++-4.8 \ automake locales clang-format-3.8 swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ From cc6c33b8b221894a5b0b8b3533803c085bb70610 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Thu, 3 Aug 2017 06:36:44 +0800 Subject: [PATCH 26/26] export Backward to python (#3174) * export Backward to python --- paddle/operators/add_op.cc | 4 ---- paddle/pybind/CMakeLists.txt | 2 +- paddle/pybind/pybind.cc | 12 ++++++++++++ .../paddle/v2/framework/tests/test_add_two_op.py | 15 ++++++++++++++- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 3a43dbfbad..85269a5f74 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -50,10 +50,6 @@ The equation is: Out = X + Y class AddOpGrad : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override {} - std::string DebugString() const override { - LOG(INFO) << "AddOpGrad"; - return ""; - } }; } // namespace operators diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 845589dcb1..ac12b504b5 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,6 +1,6 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc - DEPS pybind python + DEPS pybind python backward fc_op sgd_op add_op diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index d3cde07bd0..40ff164497 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/framework/backward.h" #include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" @@ -45,6 +46,10 @@ template void ExposeOperator(ClassType& m) { m.def("infer_shape", &ClassType::type::InferShape) .def("run", &ClassType::type::Run) + .def("type", + [](const typename ClassType::type& op) -> std::string { + return op.type_; + }) .def("outputs", [](const typename ClassType::type& op) -> std::vector { return op.outputs_; @@ -192,6 +197,13 @@ All parameter, weight, gradient are variables in Paddle. desc.InitializationErrorString()); return pd::OpRegistry::CreateOp(desc); }); + + operator_base.def("backward", + [](const pd::OperatorBase& forwardOp, + const std::unordered_set& no_grad_vars) { + return pd::Backward(forwardOp, no_grad_vars); + }); + ExposeOperator(operator_base); py::class_> net(m, "Net"); diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index 73b3734909..6e6643201b 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -1,6 +1,10 @@ import unittest -from op_test_util import OpTestMeta + import numpy +import paddle.v2.framework.core as core +import paddle.v2.framework.create_op_creation_methods as creation + +from op_test_util import OpTestMeta class TestAddOp(unittest.TestCase): @@ -13,5 +17,14 @@ class TestAddOp(unittest.TestCase): self.Out = self.X + self.Y +class TestAddGradOp(unittest.TestCase): + def test_add_grad(self): + op = creation.op_creations.add_two(X="X", Y="Y", Out="Out") + backward_op = core.Operator.backward(op, set()) + self.assertEqual(backward_op.type(), "add_two_grad") + expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).''' + self.assertEqual(expected, str(backward_op)) + + if __name__ == '__main__': unittest.main()