From ca9be82f6c45691b6661a4be54a05a369f579295 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 30 Aug 2017 13:05:33 +0800 Subject: [PATCH 01/26] add how to use eigen cn doc --- doc/howto/dev/new_op_cn.md | 7 +- doc/howto/dev/use_eigen_cn.md | 140 ++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 doc/howto/dev/use_eigen_cn.md diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index ebd2cf3ff0..55c99fa7af 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -169,6 +169,8 @@ class MulKernel : public framework::OpKernel { `MulKernel`需要重写`Compute`接口,该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型,同样可获取到输入输出和属性参数,`Compute`函数里写具体实现时。 注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + +为了使得`OpKernel`的计算过程书写较为简单,CPU、GPU的代码可以复用,我们通常借助Eigen unsupported Tensor模块来实现。关于在paddle中如何使用Eigen库,请参考对应的使用[文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md) 到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 @@ -188,9 +190,12 @@ REGISTER_OP_CPU_KERNEL(mul_grad, - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 -在 `.cu`文件中注册GPU Kernel。 +在 `.cu`文件中注册GPU Kernel。请注意,如果GPU Kernel的实现是基于Eigen unsupported模块,那么在 `.cu`的最前面请加上宏定义 `#define EIGEN_USE_GPU` ```c++ +// if use Eigen unsupported module before include head files +#define EIGEN_USE_GPU + namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); REGISTER_OP_GPU_KERNEL(mul_grad, diff --git a/doc/howto/dev/use_eigen_cn.md b/doc/howto/dev/use_eigen_cn.md new file mode 100644 index 0000000000..d22ff4799c --- /dev/null +++ b/doc/howto/dev/use_eigen_cn.md @@ -0,0 +1,140 @@ +## 在Paddle中如何使用Eigen + +神经网络本质上是一个计算图,计算需要的数据存放在`Tensor`中,而计算过程是由`Operartor`来描述的。在执行时,`Operator`调用对应`OpKernel`中的`Compute`接口,实现对`Tensor`的操作。 + + +### Eigen Tensor模块 + +Eigen Tensor模块对element-wise计算提供了强大的支持,并且书写一份代码,可以同时在CPU、GPU执行。但Eigen Tensor是一个正在开发中的模块,因此可能测试不够完备,文档较少。 + +关于Eigen Tensor模块的详细介绍请参考[文档](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) + + +### paddle::framework::Tensor + +Paddle Tensor定义在framework目录下,其主要接口如下: + +``` +class Tensor { + public: + /*! Return a pointer to mutable memory block. */ + template + inline T* data(); + + /** + * @brief Return a pointer to mutable memory block. + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(platform::Place place); + + /** + * @brief Return a pointer to mutable memory block. + * + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(DDim dims, platform::Place place); + + /*! Resize the dimensions of the memory block. */ + inline Tensor& Resize(const DDim& dims); + + /*! Return the dimensions of the memory block. */ + inline const DDim& dims() const; + + private: + /*! holds the memory block if allocated. */ + std::shared_ptr holder_; + + /*! points to dimensions of memory block. */ + DDim dim_; +}; +``` + +`Placeholder`的作用的延迟分配内存,即我们可以先定义一个Tensor,然后使用Resize接口设置Tensor的大小,最后再调用mutable_data接口分配实际的内存。 + +``` +paddle::framework::Tensor t; +paddle::platform::CPUPlace place; +// set size first +t.Resize({2, 3}); +// allocate memory on CPU later +t.mutable_data(place); +``` + +下面以AddOp为例说明Tensor的使用过程: + +- InferShape + +在运行神经网络计算图时,我们先调用每个`Operator`的`InferShape`接口,根据输入Tensor的大小来设置输出Tensor的大小,`Resize`接口会被调用。 + +``` +void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Two input of Add Op's dimension must be same."); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); +} +``` + + +- Run + +`Operator`的`Run`接口最终会调用对应`OpKernel`的`Compute`接口,在这时真正的分配内存,`mutable_data`接口会被调用。 + +``` +void Compute(const framework::ExecutionContext& context) const override { + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); + + output->mutable_data(context.GetPlace()); + + auto X = EigenVector::Flatten(*input0); + auto Y = EigenVector::Flatten(*input1); + auto Z = EigenVector::Flatten(*output); + + auto place = context.GetEigenDevice(); + + Z.device(place) = X + Y; +} +``` + + +### paddle::framework::Tensor到EigenTensor的转换 + +如上一小节所示,在具体的计算中,我们需要先把输入Tensor和输出Tensor转换为Eigen支持的格式。我们在[eigen.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen.h)中提供了一些全局函数用来实现paddle::framework::Tensor到EigenTensor/EigenMatrix/EigenVector/EigenScalar的转换。 + +以EigenTensor为例,做一个介绍 + +``` +Tensor t; +float* p = t.mutable_data(make_ddim({1, 2, 3}), platform::CPUPlace()); +for (int i = 0; i < 1 * 2 * 3; i++) { + p[i] = static_cast(i); +} + +EigenTensor::Type et = EigenTensor::From(t); +``` + +From是EigenTensor模板struct提供的一个接口,可以实现从paddle::framework::Tensor到对EigenTensor的转换。由于Tensor的rank是模板参数,因此在转换时需要显示的指定。 + +需要额外注意的是,EigenVector::From方法是把paddle中的一维Tensor转为Eigen的一维Tensor,在这里用EigenVector来表示;而EigenVector::Flatten方法是把paddle中的一个Tensor进行reshape操作,压扁成为Eigen的一维Tensor,类型仍然为EigenVector。 + +更多的转换方法请参考eigen_test.cc中的[单元测试](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen_test.cc)。 + + + +### 实现计算 + +当需要完成计算时,我们需要等式左边的EigenTensor调用device接口: + +``` +auto place = context.GetEigenDevice(); +Z.device(place) = X + Y; +``` + +由于Eigen Tensor模块的文档较少,我们可以参考TensorFlow的[kernels](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/kernels)模块下的相关`OpKernel`的计算代码。 From 1dfc5d87ff4b4f40272e387d598a1bec5477d127 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 30 Aug 2017 17:06:16 +0800 Subject: [PATCH 02/26] add more details --- doc/howto/dev/use_eigen_cn.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/howto/dev/use_eigen_cn.md b/doc/howto/dev/use_eigen_cn.md index d22ff4799c..49a726959a 100644 --- a/doc/howto/dev/use_eigen_cn.md +++ b/doc/howto/dev/use_eigen_cn.md @@ -120,7 +120,7 @@ for (int i = 0; i < 1 * 2 * 3; i++) { EigenTensor::Type et = EigenTensor::From(t); ``` -From是EigenTensor模板struct提供的一个接口,可以实现从paddle::framework::Tensor到对EigenTensor的转换。由于Tensor的rank是模板参数,因此在转换时需要显示的指定。 +From是EigenTensor模板提供的一个接口,可以实现从paddle::framework::Tensor到对EigenTensor的转换。由于Tensor的rank是模板参数,因此在转换时需要显示的指定。 需要额外注意的是,EigenVector::From方法是把paddle中的一维Tensor转为Eigen的一维Tensor,在这里用EigenVector来表示;而EigenVector::Flatten方法是把paddle中的一个Tensor进行reshape操作,压扁成为Eigen的一维Tensor,类型仍然为EigenVector。 @@ -130,11 +130,16 @@ From是EigenTensor模板struct提供的一个接口,可以实现从paddle::fra ### 实现计算 -当需要完成计算时,我们需要等式左边的EigenTensor调用device接口: +当需要完成计算时,我们需要等式左边的EigenTensor调用device接口。在这里需要注意的是,这里的EigenTensor之间的运算只是改变了原有Tensor中的数据,而不会改变原有Tensor的shape信息。 ``` +auto X = EigenVector::Flatten(*input0); +auto Y = EigenVector::Flatten(*input1); +auto Z = EigenVector::Flatten(*output); auto place = context.GetEigenDevice(); Z.device(place) = X + Y; ``` +在这段代码中,input0/input1/output可以是任意维度的Tensor。我们调用了EigenVector的Flatten接口,把任意维度的Tensor转为了一维的EigenVector。而在计算结束之后,input0/input1/output的原有shape信息不变。如果想改变原有Tensor的shape信息,可以调用Resize接口进行改变。 + 由于Eigen Tensor模块的文档较少,我们可以参考TensorFlow的[kernels](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/kernels)模块下的相关`OpKernel`的计算代码。 From ed72af48cecb1462b14c2105e9451052fa583cde Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Fri, 1 Sep 2017 19:01:12 +0800 Subject: [PATCH 03/26] Add cos_sim op. --- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/cos_sim_op.cc | 91 ++++++++++++++++++ paddle/operators/cos_sim_op.cu | 22 +++++ paddle/operators/cos_sim_op.h | 93 +++++++++++++++++++ paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/gradient_checker.py | 21 +++-- .../paddle/v2/framework/tests/op_test_util.py | 8 +- .../v2/framework/tests/test_cos_sim_op.py | 40 ++++++++ 9 files changed, 267 insertions(+), 12 deletions(-) create mode 100644 paddle/operators/cos_sim_op.cc create mode 100644 paddle/operators/cos_sim_op.cu create mode 100644 paddle/operators/cos_sim_op.h create mode 100644 python/paddle/v2/framework/tests/test_cos_sim_op.py diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index e5efcccb0e..25dbd236e6 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -56,7 +56,7 @@ list(REMOVE_ITEM GENERAL_OPS op_library(net_op SRCS net_op.cc) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) -op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc +op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc new file mode 100644 index 0000000000..21a616522b --- /dev/null +++ b/paddle/operators/cos_sim_op.cc @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/cos_sim_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class CosSimOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Dimensions of Input(X) and Input(Y) must be the same."); + + auto dims = ctx.Input("X")->dims(); + ctx.Output("Out")->Resize({dims[0], 1}); + } +}; + +class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { + public: + CosSimOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of cos_sim op."); + AddInput("Y", "The second input of cos_sim op."); + AddOutput("Out", "The output of cos_sim op."); + AddComment(R"DOC( +Cosine Similarity Operator. + +The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)) +)DOC"); + } +}; + +class CosSimOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + + auto x_dims = ctx.Input("X")->dims(); + auto y_dims = ctx.Input("Y")->dims(); + auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); + PADDLE_ENFORCE_EQ(x_dims, y_dims, + "Dimensions of Input(X) and Input(Y) must be the same."); + PADDLE_ENFORCE_EQ(out_dims[0], x_dims[0], + "1st dimension of Out@GRAD must equal to Input(X)"); + PADDLE_ENFORCE_EQ(out_dims[1], 1, + "1st dimension of Out@GRAD must equal to Input(X)"); + + auto *x_grad = ctx.Output(framework::GradVarName("X")); + auto *y_grad = ctx.Output(framework::GradVarName("Y")); + x_grad->Resize(x_dims); + y_grad->Resize(y_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(cos_sim, ops::CosSimOp, ops::CosSimOpMaker, cos_sim_grad, + ops::CosSimOpGrad); +REGISTER_OP_CPU_KERNEL(cos_sim, + ops::CosSimKernel); +REGISTER_OP_CPU_KERNEL( + cos_sim_grad, ops::CosSimGradKernel); diff --git a/paddle/operators/cos_sim_op.cu b/paddle/operators/cos_sim_op.cu new file mode 100644 index 0000000000..0cb8fd26de --- /dev/null +++ b/paddle/operators/cos_sim_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/cos_sim_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(cos_sim, + ops::CosSimKernel); +REGISTER_OP_GPU_KERNEL( + cos_sim_grad, ops::CosSimGradKernel); diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h new file mode 100644 index 0000000000..5247087cc1 --- /dev/null +++ b/paddle/operators/cos_sim_op.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + +template +class CosSimKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* z = context.Output("Out"); + + z->mutable_data(context.GetPlace()); + + auto dims = x->dims(); + int size = static_cast(framework::product(dims)); + auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); + auto X = EigenMatrix::From(*x, new_dims); + auto Y = EigenMatrix::From(*y, new_dims); + auto Z = EigenMatrix::From(*z, new_dims); + + auto XY = (X * Y).sum(Eigen::array({1})); + auto XX = (X * X).sum(Eigen::array({1})); + auto YY = (Y * Y).sum(Eigen::array({1})); + auto place = context.GetEigenDevice(); + Z.device(place) = XY / XX.sqrt() / YY.sqrt(); + } +}; + +template +class CosSimGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* z = context.Input("Out"); + auto* grad_x = context.Output(framework::GradVarName("X")); + auto* grad_y = context.Output(framework::GradVarName("Y")); + auto* grad_z = context.Input(framework::GradVarName("Out")); + + grad_x->mutable_data(context.GetPlace()); + grad_y->mutable_data(context.GetPlace()); + + auto dims = x->dims(); + int size = static_cast(framework::product(dims)); + auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); + auto X = EigenMatrix::From(*x, new_dims); + auto Y = EigenMatrix::From(*y, new_dims); + auto Z = EigenMatrix::From(*z); + auto dX = EigenMatrix::From(*grad_x, new_dims); + auto dY = EigenMatrix::From(*grad_y, new_dims); + auto dZ = EigenMatrix::From(*grad_z); + + auto XX = (X * X).sum(Eigen::array({1})); + auto YY = (Y * Y).sum(Eigen::array({1})); + Eigen::DSizes bcast(1, dims[1]); + auto denominator_bcast = (XX.sqrt() * YY.sqrt()).broadcast(bcast); + auto Z_bcast = Z.broadcast(bcast); + auto dZ_bcast = dZ.broadcast(bcast); + auto place = context.GetEigenDevice(); + dX.device(place) = + dZ_bcast * (Y / denominator_bcast - Z_bcast * X / XX.broadcast(bcast)); + dY.device(place) = + dZ_bcast * (X / denominator_bcast - Z_bcast * Y / YY.broadcast(bcast)); + // dX.device(place) = X; + // Y.device(place) = Y; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3bc150ccb7..a8ec35dfad 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -46,6 +46,7 @@ USE_OP(lookup_table); USE_OP(scale); USE_OP_ITSELF(identity); USE_OP(minus); +USE_OP(cos_sim); USE_CPU_ONLY_OP(gather); USE_CPU_ONLY_OP(scatter); diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 661ebd8964..e0f77d7973 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -4,6 +4,7 @@ py_test(test_scope SRCS test_scope.py) py_test(test_tensor SRCS test_tensor.py) py_test(test_mul_op SRCS test_mul_op.py) +py_test(test_cos_sim_op SRCS test_cos_sim_op.py) py_test(test_mean_op SRCS test_mean_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 518f828bac..bf01ea4876 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -36,13 +36,13 @@ def get_numeric_gradient(op, in_place=False): """ Get Numeric Gradient for an operator's input. - - :param op: C++ operator instance, could be an network - :param input_values: The input variables. Should be an dictionary, key is + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is variable name. Value is numpy array. - :param output_name: The final output variable name. + :param output_name: The final output variable name. :param input_to_check: The input variable need to get gradient. - :param delta: The perturbation value for numeric gradient method. The + :param delta: The perturbation value for numeric gradient method. The smaller delta is, the more accurate result will get. But if that delta is too small, it could occur numerical stability problem. :param local_scope: The local scope used for get_numeric_gradient. @@ -229,9 +229,9 @@ class GradientChecker(unittest.TestCase): """Use relative error for the comparison. :param numeric_grads: the numerical graidents. - :type numeric_grads: a list of numpy.array + :type numeric_grads: a list of numpy.array :param analytic_grads: the analytical graidents. - :type analytic_grads: a list of numpy.array + :type analytic_grads: a list of numpy.array :param name: the names of gradients, used to print for debug. :type names: a list of string :param msg_prefix: string info, used to print for debug. @@ -304,6 +304,13 @@ class GradientChecker(unittest.TestCase): # get analytical gradients according to different device analytic_grads = self.__get_gradient(forward_op, backward_op, input_vars, check_names, place) + #print(numeric_grads[0], numeric_grads[0].shape) + print("dim0: ", numeric_grads[0], numeric_grads[0].shape) + print("dim0: ", analytic_grads[0], analytic_grads[0].shape) + print("---------------------") + print("dim1: ", numeric_grads[1], numeric_grads[1].shape) + print("dim1: ", analytic_grads[1], analytic_grads[1].shape) + assert False self.__assert_is_close(numeric_grads, analytic_grads, check_names, max_relative_error, "Gradient Check On %s" % str(place)) diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 3bc05a0fec..a4899355b5 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -6,13 +6,13 @@ from paddle.v2.framework.op import Operator class OpTestMeta(type): """ Operator Test ClassMeta. - - It injects `test_all` method into user's OperatorTest class, to make Python + + It injects `test_all` method into user's OperatorTest class, to make Python unittest module run that method. - + The `test_all` read what value is stored in `self`. It use self's values to create and run a operator, and check whether that op is OK or not. - + See `test_add_two_op` for example usage. """ diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py new file mode 100644 index 0000000000..f3b04d25f2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py @@ -0,0 +1,40 @@ +import unittest +import numpy as np +from gradient_checker import GradientChecker, create_op +from op_test_util import OpTestMeta + + +class TestCosSimOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "cos_sim" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((32, 84)).astype("float32") + } + expect = (self.inputs['X'] * self.inputs['Y']).sum(axis=1) / \ + np.linalg.norm(self.inputs['X'], axis=1) / \ + np.linalg.norm(self.inputs['Y'], axis=1) + expect = np.expand_dims(expect, 1) + self.outputs = {'Out': expect} + + +class CosSimGradOpTest(GradientChecker): + def test_cos_sim(self): + op = create_op("cos_sim") + #inputs = { + #'X': np.random.random((2, 2)).astype("float32"), + #'Y': np.random.random((2, 2)).astype("float32") + #} + inputs = { + 'X': np.array([[0.9, 0.6], [1.9, 1.6]]).astype("float32"), + 'Y': np.array([[0.7, 0.8], [1.7, 1.8]]).astype("float32") + } + print(inputs) + self.check_grad( + op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) + + +if __name__ == '__main__': + unittest.main() From 9d2909be2abcb9b8728f7c4de7437c07a1254b8b Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 1 Sep 2017 19:27:52 +0800 Subject: [PATCH 04/26] rename add_op to add_two_op --- paddle/framework/CMakeLists.txt | 2 +- paddle/operators/{add_op.cc => add_two_op.cc} | 2 +- paddle/operators/{add_op.cu => add_two_op.cu} | 2 +- paddle/operators/{add_op.h => add_two_op.h} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename paddle/operators/{add_op.cc => add_two_op.cc} (98%) rename paddle/operators/{add_op.cu => add_two_op.cu} (95%) rename paddle/operators/{add_op.h => add_two_op.h} (100%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index c0838d9b75..e138517b6b 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -25,7 +25,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) -cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) +cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_two_op) py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_two_op.cc similarity index 98% rename from paddle/operators/add_op.cc rename to paddle/operators/add_two_op.cc index 8ab748ed71..bc99e306e0 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_two_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/add_op.h" +#include "paddle/operators/add_two_op.h" namespace paddle { namespace operators { diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_two_op.cu similarity index 95% rename from paddle/operators/add_op.cu rename to paddle/operators/add_two_op.cu index cec5f558cb..acc03b2c8b 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_two_op.cu @@ -14,7 +14,7 @@ #define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" -#include "paddle/operators/add_op.h" +#include "paddle/operators/add_two_op.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(add_two, diff --git a/paddle/operators/add_op.h b/paddle/operators/add_two_op.h similarity index 100% rename from paddle/operators/add_op.h rename to paddle/operators/add_two_op.h From 91215bcef907baffdf52bb7894a0a33d0253c16f Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sat, 2 Sep 2017 17:25:30 +0800 Subject: [PATCH 05/26] Fix a bug causing wrong gradient results in cos_sim op. --- paddle/operators/cos_sim_op.cc | 32 ++++++++++---- paddle/operators/cos_sim_op.h | 34 +++++++++------ .../v2/framework/tests/gradient_checker.py | 7 ---- .../v2/framework/tests/test_cos_sim_op.py | 42 ++++++++++++------- 4 files changed, 71 insertions(+), 44 deletions(-) diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index 21a616522b..3760d0b161 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -25,14 +25,16 @@ class CosSimOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), ctx.Input("Y")->dims(), "Dimensions of Input(X) and Input(Y) must be the same."); auto dims = ctx.Input("X")->dims(); ctx.Output("Out")->Resize({dims[0], 1}); + ctx.Output("XNorm")->Resize({dims[0], 1}); + ctx.Output("YNorm")->Resize({dims[0], 1}); } }; @@ -43,6 +45,9 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The first input of cos_sim op."); AddInput("Y", "The second input of cos_sim op."); AddOutput("Out", "The output of cos_sim op."); + AddOutput("XNorm", "Row norm of the first input.").AsIntermediate(); + AddOutput("YNorm", "Row norm of the second input.").AsIntermediate(); + AddComment(R"DOC( Cosine Similarity Operator. @@ -57,20 +62,31 @@ class CosSimOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("XNorm"), + "Input(XNorm) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("YNorm"), + "Input(YNorm) must not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), - "Input(Out@GRAD) should not be null."); + "Input(Out@GRAD) must not be null."); auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); + auto xnorm_dims = ctx.Input("XNorm")->dims(); + auto ynorm_dims = ctx.Input("YNorm")->dims(); auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); PADDLE_ENFORCE_EQ(x_dims, y_dims, "Dimensions of Input(X) and Input(Y) must be the same."); + PADDLE_ENFORCE_EQ(xnorm_dims[0], x_dims[0], + "1st dimension of XNorm must equal that of Input(X)."); + PADDLE_ENFORCE_EQ(xnorm_dims[1], 1, "2st dimension of XNorm must be one."); + PADDLE_ENFORCE_EQ(ynorm_dims[0], y_dims[0], + "1st dimension of YNorm must equal that of Input(Y)."); + PADDLE_ENFORCE_EQ(ynorm_dims[1], 1, "2st dimension of YNorm must be one."); PADDLE_ENFORCE_EQ(out_dims[0], x_dims[0], - "1st dimension of Out@GRAD must equal to Input(X)"); - PADDLE_ENFORCE_EQ(out_dims[1], 1, - "1st dimension of Out@GRAD must equal to Input(X)"); + "1st dimension of Out@GRAD must equal that of Input(X)"); + PADDLE_ENFORCE_EQ(out_dims[1], 1, "1st dimension of Out@GRAD must be one."); auto *x_grad = ctx.Output(framework::GradVarName("X")); auto *y_grad = ctx.Output(framework::GradVarName("Y")); diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 5247087cc1..69d35d8bc2 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -31,21 +31,27 @@ class CosSimKernel : public framework::OpKernel { auto* x = context.Input("X"); auto* y = context.Input("Y"); auto* z = context.Output("Out"); + auto* x_norm = context.Output("XNorm"); + auto* y_norm = context.Output("YNorm"); z->mutable_data(context.GetPlace()); + x_norm->mutable_data(context.GetPlace()); + y_norm->mutable_data(context.GetPlace()); auto dims = x->dims(); int size = static_cast(framework::product(dims)); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto X = EigenMatrix::From(*x, new_dims); auto Y = EigenMatrix::From(*y, new_dims); - auto Z = EigenMatrix::From(*z, new_dims); + auto Z = EigenMatrix::From(*z); + auto XNorm = EigenMatrix::From(*x_norm); + auto YNorm = EigenMatrix::From(*y_norm); - auto XY = (X * Y).sum(Eigen::array({1})); - auto XX = (X * X).sum(Eigen::array({1})); - auto YY = (Y * Y).sum(Eigen::array({1})); auto place = context.GetEigenDevice(); - Z.device(place) = XY / XX.sqrt() / YY.sqrt(); + auto XY = (X * Y).sum(Eigen::array({1})); + XNorm.device(place) = (X * X).sum(Eigen::array({1})).sqrt(); + YNorm.device(place) = (Y * Y).sum(Eigen::array({1})).sqrt(); + Z.device(place) = XY / XNorm / YNorm; } }; @@ -56,6 +62,8 @@ class CosSimGradKernel : public framework::OpKernel { auto* x = context.Input("X"); auto* y = context.Input("Y"); auto* z = context.Input("Out"); + auto* x_norm = context.Input("XNorm"); + auto* y_norm = context.Input("YNorm"); auto* grad_x = context.Output(framework::GradVarName("X")); auto* grad_y = context.Output(framework::GradVarName("Y")); auto* grad_z = context.Input(framework::GradVarName("Out")); @@ -69,23 +77,23 @@ class CosSimGradKernel : public framework::OpKernel { auto X = EigenMatrix::From(*x, new_dims); auto Y = EigenMatrix::From(*y, new_dims); auto Z = EigenMatrix::From(*z); + auto X_norm = EigenMatrix::From(*x_norm); + auto Y_norm = EigenMatrix::From(*y_norm); auto dX = EigenMatrix::From(*grad_x, new_dims); auto dY = EigenMatrix::From(*grad_y, new_dims); auto dZ = EigenMatrix::From(*grad_z); - auto XX = (X * X).sum(Eigen::array({1})); - auto YY = (Y * Y).sum(Eigen::array({1})); - Eigen::DSizes bcast(1, dims[1]); - auto denominator_bcast = (XX.sqrt() * YY.sqrt()).broadcast(bcast); + Eigen::DSizes bcast(1, new_dims[1]); auto Z_bcast = Z.broadcast(bcast); auto dZ_bcast = dZ.broadcast(bcast); auto place = context.GetEigenDevice(); + auto X_snorm_bcast = X_norm.square().eval().broadcast(bcast); + auto Y_snorm_bcast = Y_norm.square().eval().broadcast(bcast); + auto norm_prod_bcast = (X_norm * Y_norm).eval().broadcast(bcast); dX.device(place) = - dZ_bcast * (Y / denominator_bcast - Z_bcast * X / XX.broadcast(bcast)); + dZ_bcast * (Y / norm_prod_bcast - Z_bcast * X / X_snorm_bcast); dY.device(place) = - dZ_bcast * (X / denominator_bcast - Z_bcast * Y / YY.broadcast(bcast)); - // dX.device(place) = X; - // Y.device(place) = Y; + dZ_bcast * (X / norm_prod_bcast - Z_bcast * Y / Y_snorm_bcast); } }; diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index bf01ea4876..409b3caf33 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -304,13 +304,6 @@ class GradientChecker(unittest.TestCase): # get analytical gradients according to different device analytic_grads = self.__get_gradient(forward_op, backward_op, input_vars, check_names, place) - #print(numeric_grads[0], numeric_grads[0].shape) - print("dim0: ", numeric_grads[0], numeric_grads[0].shape) - print("dim0: ", analytic_grads[0], analytic_grads[0].shape) - print("---------------------") - print("dim1: ", numeric_grads[1], numeric_grads[1].shape) - print("dim1: ", analytic_grads[1], analytic_grads[1].shape) - assert False self.__assert_is_close(numeric_grads, analytic_grads, check_names, max_relative_error, "Gradient Check On %s" % str(place)) diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py index f3b04d25f2..a19be47f76 100644 --- a/python/paddle/v2/framework/tests/test_cos_sim_op.py +++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py @@ -10,30 +10,40 @@ class TestCosSimOp(unittest.TestCase): def setUp(self): self.type = "cos_sim" self.inputs = { - 'X': np.random.random((32, 84)).astype("float32"), - 'Y': np.random.random((32, 84)).astype("float32") + 'X': np.random.random((32, 64)).astype("float32"), + 'Y': np.random.random((32, 64)).astype("float32") + } + expect_x_norm = np.linalg.norm(self.inputs['X'], axis=1) + expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=1) + expect_out = (self.inputs['X'] * self.inputs['Y']).sum(axis=1) / \ + expect_x_norm / expect_y_norm + self.outputs = { + 'XNorm': np.expand_dims(expect_x_norm, 1), + 'YNorm': np.expand_dims(expect_y_norm, 1), + 'Out': np.expand_dims(expect_out, 1) } - expect = (self.inputs['X'] * self.inputs['Y']).sum(axis=1) / \ - np.linalg.norm(self.inputs['X'], axis=1) / \ - np.linalg.norm(self.inputs['Y'], axis=1) - expect = np.expand_dims(expect, 1) - self.outputs = {'Out': expect} class CosSimGradOpTest(GradientChecker): - def test_cos_sim(self): + def test_cos_sim_2d(self): + op = create_op("cos_sim") + inputs = { + 'X': np.random.random((10, 5)).astype("float32"), + 'Y': np.random.random((10, 5)).astype("float32") + } + self.compare_grad(op, inputs) + self.check_grad( + op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.05) + + def test_cos_sim_3d(self): op = create_op("cos_sim") - #inputs = { - #'X': np.random.random((2, 2)).astype("float32"), - #'Y': np.random.random((2, 2)).astype("float32") - #} inputs = { - 'X': np.array([[0.9, 0.6], [1.9, 1.6]]).astype("float32"), - 'Y': np.array([[0.7, 0.8], [1.7, 1.8]]).astype("float32") + 'X': np.random.random((10, 5, 2)).astype("float32"), + 'Y': np.random.random((10, 5, 2)).astype("float32") } - print(inputs) + self.compare_grad(op, inputs) self.check_grad( - op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) + op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.05) if __name__ == '__main__': From 879866cc565ed5e2fe76e92bcb2268c6916f7ffd Mon Sep 17 00:00:00 2001 From: Zhuo Zhang Date: Sun, 3 Sep 2017 15:57:14 +0800 Subject: [PATCH 06/26] add PyDataProvider2.InputType pretty print function --- python/paddle/trainer/PyDataProvider2.py | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py index 7e305e2cd9..033c71cf8f 100644 --- a/python/paddle/trainer/PyDataProvider2.py +++ b/python/paddle/trainer/PyDataProvider2.py @@ -27,6 +27,14 @@ class SequenceType(object): SEQUENCE = 1 SUB_SEQUENCE = 2 + @classmethod + def tostring(cls, value): + for k in cls.__dict__: + if not k.startswith('__'): + if getattr(cls, k) == value: + return cls.__name__ + '.' + k + return 'INVALID(' + str(value) + ')' + # TODO(yuyang18): Add string data type here. class DataType(object): @@ -35,6 +43,14 @@ class DataType(object): SparseValue = 2 Index = 3 + @classmethod + def tostring(cls, value): + for k in cls.__dict__: + if not k.startswith('__'): + if getattr(cls, k) == value: + return cls.__name__ + '.' + k + return 'INVALID(' + str(value) + ')' + class CacheType(object): NO_CACHE = 0 # No cache at all @@ -69,6 +85,25 @@ class InputType(object): self.seq_type = seq_type self.type = tp + def __repr__(self): + """ + Return a human readable representation like 'InputType(dim=25921, seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)' + """ + repr_str = type(self).__name__ + repr_str += '(' + serialize_func_map = { + 'dim': repr, + 'seq_type': SequenceType.tostring, + 'type': DataType.tostring + } + for idx, k in enumerate(self.__slots__): + if idx != 0: + repr_str += ', ' + repr_str += ( + k + '=' + serialize_func_map.get(k, repr)(getattr(self, k))) + repr_str += ')' + return repr_str + def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE): """ From a60128aeb235c3e53b6785537bf3f67502ca079d Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 4 Sep 2017 14:11:43 +0800 Subject: [PATCH 07/26] move identity_op from scale_op.cc to be a single file --- paddle/operators/CMakeLists.txt | 2 ++ paddle/operators/identity_op.cc | 54 +++++++++++++++++++++++++++++++++ paddle/operators/scale_op.cc | 31 +------------------ 3 files changed, 57 insertions(+), 30 deletions(-) create mode 100644 paddle/operators/identity_op.cc diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index e5efcccb0e..d85c3d575a 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -48,12 +48,14 @@ add_subdirectory(math) list(REMOVE_ITEM GENERAL_OPS net_op + identity_op minus_op mul_op recurrent_op scale_op) op_library(net_op SRCS net_op.cc) +op_library(identity_op SRCS identity_op.cc DEPS scale_op) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc new file mode 100644 index 0000000000..be956bf3b3 --- /dev/null +++ b/paddle/operators/identity_op.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/net_op.h" +#include "paddle/operators/scale_op.h" + +namespace paddle { +namespace operators { + +// identity is a alias of scale op. This is also a example for creating a alias +// operator. +template +class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { + public: + IdentityOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "input tensor of identity op"); + AddOutput("Out", "output tensor of identity op"); + AddComment("identity operator. Just a alias of scale op which scale = 1.0"); + } +}; + +template +class IdentityOp : public NetOp { + public: + IdentityOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AppendOp(framework::OpRegistry::CreateOp( + "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}}, + {{"scale", static_cast(1)}})); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp, + ops::IdentityOpMaker); diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 4e039688d4..57696d2ee6 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -48,7 +48,7 @@ The equation is: Out = scale*X } }; -// Identity Op's gradient is identity op, too. +// Scale Op's gradient is scale op, too. // Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out)) template class ScaleGradOp : public NetOp { @@ -65,33 +65,6 @@ class ScaleGradOp : public NetOp { } }; -// identity is a alias of scale op. This is also a example for creating a alias -// operator. -template -class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { - public: - IdentityOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "input tensor of identity op"); - AddOutput("Out", "output tensor of identity op"); - AddComment("identity operator. Just a alias of scale op which scale = 1.0"); - } -}; - -template -class IdentityOp : public NetOp { - public: - IdentityOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : NetOp(type, inputs, outputs, attrs) { - AppendOp(framework::OpRegistry::CreateOp( - "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}}, - {{"scale", static_cast(1)}})); - } -}; - } // namespace operators } // namespace paddle @@ -101,5 +74,3 @@ REGISTER_OP(scale, ops::ScaleOp, ops::ScaleOpMaker, ops::ScaleGradOp); REGISTER_OP_CPU_KERNEL(scale, ops::ScaleKernel); -REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp, - ops::IdentityOpMaker); From 740c8ba12a29418eefeb22a843bebb1781f300fe Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 4 Sep 2017 16:40:23 +0800 Subject: [PATCH 08/26] remove scatter_op.cu/gather_op.cu as they support only_cpu now --- paddle/operators/CMakeLists.txt | 24 ++++++++++++++++-------- paddle/operators/gather_op.cu | 20 -------------------- paddle/operators/scatter_op.cu | 20 -------------------- 3 files changed, 16 insertions(+), 48 deletions(-) delete mode 100644 paddle/operators/gather_op.cu delete mode 100644 paddle/operators/scatter_op.cu diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index d85c3d575a..90185101c4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -46,15 +46,20 @@ endfunction() add_subdirectory(math) -list(REMOVE_ITEM GENERAL_OPS - net_op - identity_op - minus_op - mul_op - recurrent_op - scale_op) +set(ONLYCPU_OPS + net_op + gather_op + scatter_op) +foreach(src ${ONLYCPU_OPS}) + op_library(${src} SRCS ${src}.cc) +endforeach() -op_library(net_op SRCS net_op.cc) +set(DEPS_OPS + identity_op + minus_op + mul_op + recurrent_op + scale_op) op_library(identity_op SRCS identity_op.cc DEPS scale_op) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) @@ -62,6 +67,9 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) +list(REMOVE_ITEM GENERAL_OPS + ${ONLYCPU_OPS} + ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) op_library(${src} SRCS ${src}.cc ${src}.cu) endforeach() diff --git a/paddle/operators/gather_op.cu b/paddle/operators/gather_op.cu deleted file mode 100644 index 3f04a7b3f8..0000000000 --- a/paddle/operators/gather_op.cu +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#define EIGEN_USE_GPU -#include "paddle/operators/gather_op.h" - -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(gather, - ops::GatherOpKernel); diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu deleted file mode 100644 index 6716b47883..0000000000 --- a/paddle/operators/scatter_op.cu +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#define EIGEN_USE_GPU -#include "paddle/operators/scatter_op.h" - -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(scatter, - ops::ScatterOpKernel); From beafabc73e929e3790ba93687917a002ae0f3da0 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 5 Sep 2017 10:20:43 +0800 Subject: [PATCH 09/26] follow comments --- doc/howto/dev/use_eigen_cn.md | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/howto/dev/use_eigen_cn.md b/doc/howto/dev/use_eigen_cn.md index 49a726959a..1367323b71 100644 --- a/doc/howto/dev/use_eigen_cn.md +++ b/doc/howto/dev/use_eigen_cn.md @@ -7,14 +7,14 @@ Eigen Tensor模块对element-wise计算提供了强大的支持,并且书写一份代码,可以同时在CPU、GPU执行。但Eigen Tensor是一个正在开发中的模块,因此可能测试不够完备,文档较少。 -关于Eigen Tensor模块的详细介绍请参考[文档](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) +关于Eigen Tensor模块的详细介绍请参考[文档1](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) 和[文档2](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md) ### paddle::framework::Tensor Paddle Tensor定义在framework目录下,其主要接口如下: -``` +```cpp class Tensor { public: /*! Return a pointer to mutable memory block. */ @@ -54,9 +54,9 @@ class Tensor { }; ``` -`Placeholder`的作用的延迟分配内存,即我们可以先定义一个Tensor,然后使用Resize接口设置Tensor的大小,最后再调用mutable_data接口分配实际的内存。 +`Placeholder`的作用是延迟分配内存,即我们可以先定义一个Tensor,然后使用Resize接口设置Tensor的大小,最后再调用mutable_data接口分配实际的内存。 -``` +```cpp paddle::framework::Tensor t; paddle::platform::CPUPlace place; // set size first @@ -65,13 +65,14 @@ t.Resize({2, 3}); t.mutable_data(place); ``` +### paddle::framework::Tensor使用样例 下面以AddOp为例说明Tensor的使用过程: - InferShape 在运行神经网络计算图时,我们先调用每个`Operator`的`InferShape`接口,根据输入Tensor的大小来设置输出Tensor的大小,`Resize`接口会被调用。 -``` +```cpp void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), ctx.Input("Y")->dims(), @@ -85,7 +86,7 @@ void InferShape(const framework::InferShapeContext &ctx) const override { `Operator`的`Run`接口最终会调用对应`OpKernel`的`Compute`接口,在这时真正的分配内存,`mutable_data`接口会被调用。 -``` +```cpp void Compute(const framework::ExecutionContext& context) const override { auto* input0 = context.Input("X"); auto* input1 = context.Input("Y"); @@ -93,13 +94,13 @@ void Compute(const framework::ExecutionContext& context) const override { output->mutable_data(context.GetPlace()); - auto X = EigenVector::Flatten(*input0); - auto Y = EigenVector::Flatten(*input1); - auto Z = EigenVector::Flatten(*output); + auto x = EigenVector::Flatten(*input0); + auto y = EigenVector::Flatten(*input1); + auto z = EigenVector::Flatten(*output); auto place = context.GetEigenDevice(); - Z.device(place) = X + Y; + z.device(place) = x + y; } ``` @@ -110,7 +111,7 @@ void Compute(const framework::ExecutionContext& context) const override { 以EigenTensor为例,做一个介绍 -``` +```cpp Tensor t; float* p = t.mutable_data(make_ddim({1, 2, 3}), platform::CPUPlace()); for (int i = 0; i < 1 * 2 * 3; i++) { @@ -122,7 +123,7 @@ EigenTensor::Type et = EigenTensor::From(t); From是EigenTensor模板提供的一个接口,可以实现从paddle::framework::Tensor到对EigenTensor的转换。由于Tensor的rank是模板参数,因此在转换时需要显示的指定。 -需要额外注意的是,EigenVector::From方法是把paddle中的一维Tensor转为Eigen的一维Tensor,在这里用EigenVector来表示;而EigenVector::Flatten方法是把paddle中的一个Tensor进行reshape操作,压扁成为Eigen的一维Tensor,类型仍然为EigenVector。 +在Eigen中,不同rank的Tensor是不同类型,Vector是rank为1的Tensor。需要额外注意的是,EigenVector::From方法是把paddle中的一维Tensor转为Eigen的一维Tensor,在这里用EigenVector来表示;而EigenVector::Flatten方法是把paddle中的一个Tensor进行reshape操作,压扁成为Eigen的一维Tensor,类型仍然为EigenVector。 更多的转换方法请参考eigen_test.cc中的[单元测试](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen_test.cc)。 @@ -132,12 +133,12 @@ From是EigenTensor模板提供的一个接口,可以实现从paddle::framework 当需要完成计算时,我们需要等式左边的EigenTensor调用device接口。在这里需要注意的是,这里的EigenTensor之间的运算只是改变了原有Tensor中的数据,而不会改变原有Tensor的shape信息。 -``` -auto X = EigenVector::Flatten(*input0); -auto Y = EigenVector::Flatten(*input1); -auto Z = EigenVector::Flatten(*output); +```cpp +auto x = EigenVector::Flatten(*input0); +auto y = EigenVector::Flatten(*input1); +auto z = EigenVector::Flatten(*output); auto place = context.GetEigenDevice(); -Z.device(place) = X + Y; +z.device(place) = x + y; ``` 在这段代码中,input0/input1/output可以是任意维度的Tensor。我们调用了EigenVector的Flatten接口,把任意维度的Tensor转为了一维的EigenVector。而在计算结束之后,input0/input1/output的原有shape信息不变。如果想改变原有Tensor的shape信息,可以调用Resize接口进行改变。 From efd40b66cc4bb898fe152337aeda9f65ced33767 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 5 Sep 2017 14:17:13 +0800 Subject: [PATCH 10/26] update the doc for how to write the operators. --- doc/howto/dev/new_op_cn.md | 141 +++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 77 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 3e71a0a592..6bd54137ba 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -23,19 +23,20 @@ - `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 - `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 -依据是否包含kernel,将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: - - - 内容 | 定义位置 --------------- | :---------------------- -OpProtoMake定义 | `.cc`文件,Backward Op不需要定义OpProtoMake -Op定义 | `.cc`文件 -Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在`.cc`文件,GPU可在`.cu`文件。 -注册Op | Op注册在`.cc`文件;Kernel注册CPU在`.cc`文件,GPU在`.cu`文件 - - -下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 +依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: + + + 内容 | 定义位置 +-------------- | :---------------------- +OpProtoMake定义 | `*_op.cc`文件,Backward Op不需要定义OpProtoMake +Op定义 | `*_op.cc`文件 +Kernel实现 | CPU、GPU共享Kernel在`*_op.h`文件,否则,CPU可以在`*_op.cc`文件,GPU可在`*_op.cu`文件。 +注册Op | Op注册在`*_op.cc`文件;Kernel注册CPU在`*_op.cc`文件,GPU在`*_op.cu`文件 +实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 + + +下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 ## 实现C++类 @@ -43,8 +44,8 @@ Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在` ### 1. 定义ProtoMaker类 矩阵乘的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: - -``` + +```cpp class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) @@ -59,20 +60,20 @@ The equation is: Out = X * Y } }; ``` - -[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个: + +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个参数: - `framework::OpProto` : 前者存储Op的输入输出和参数属性,将用于Python API接口的生成。 - `framework::OpAttrChecker` :后者用于检查参数属性的合法性。 - + 构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加该Op的注释,这些函数会将对应内容添加到`OpProto`中。 -在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,该命名尽可能的规范。 +在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守命名规范。 + - 再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子: - -``` + +```cpp template class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { public: @@ -87,17 +88,17 @@ The equation is: Out = scale*X } }; ``` - - 在这个例子里,两处不同: - + + 这个例子有两处不同: + - `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中。 - `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 - + ### 2. 定义Operator类 -```c++ +```cpp class MulOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -121,20 +122,20 @@ class MulOp : public framework::OperatorWithKernel { ``` [`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员: - -```c++ + +```cpp using framework::OperatorWithKernel::OperatorWithKernel; ``` 这句表示使用基类`OperatorWithKernel`的构造函数,也可写成: - -```c++ + +```cpp MulOp(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorWithKernel(type, inputs, outputs, attrs) {} -``` - +``` + 还需要重写`InferShape`接口。`InferShape`为const函数,不能修改Op的成员变量,参数为`const framework::InferShapeContext &ctx`,通过该参数可获取到输入输出以及属性。它的功能是: - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法。 @@ -144,7 +145,7 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, ### 3. 定义OpKernel类 -```C++ +```cpp template class MulKernel : public framework::OpKernel { public: @@ -163,34 +164,34 @@ class MulKernel : public framework::OpKernel { `MulKernel`继承自`framework::OpKernel`,带有模板参数: - `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 - + - `typename T` : 表示数据类型,如`float`, `double`等。 - + `MulKernel`需要重写`Compute`接口,该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型,同样可获取到输入输出和属性参数,`Compute`函数里写具体实现时。 - -注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 - + +注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + 到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 - + ### 4. 注册Operator 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 -```c++ +```cpp namespace ops = paddle::operators; REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); REGISTER_OP_CPU_KERNEL(mul_grad, ops::MulGradKernel); ``` - + - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`, - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 在 `.cu`文件中注册GPU Kernel。 - -```c++ + +```cpp namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); REGISTER_OP_GPU_KERNEL(mul_grad, @@ -199,56 +200,42 @@ REGISTER_OP_GPU_KERNEL(mul_grad, ### 5. 编译 -在[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件中添加编译。 - -``` -op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) -``` - -下面命令可以编译: - +无需修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动加入编译。 + +直接执行下面命令可进行编译: + ``` make mul_op ``` ## 绑定Python -- 绑定Python - - 在 [`paddle/pybind/pybind.cc +- 绑定Python + + 在 [`paddle/pybind/pybind.cc `](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类: ``` USE_OP(mul); ``` 如果只实现了CPU版本,则使用`USE_CPU_ONLY_OP`: - + ``` USE_CPU_ONLY_OP(gather); ``` - + 如果OP不带Kernel,则使用`USE_NO_KENREL_OP`: - + ``` USE_NO_KENREL_OP(recurrent); ``` - + 使用`USE_OP`告知编译器需要链接该Op的目标文件,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 - - + + - 生成库 - 在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中,使得该Op可以链接到生成的lib库中。 - - ``` - if(WITH_PYTHON) - cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - mul_op - minus_op) - endif(WITH_PYTHON) - ``` + 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动被添加链接至生成的lib库中。 ## 实现单元测试 @@ -258,7 +245,7 @@ make mul_op 前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 -``` +```python import unittest import numpy as np from gradient_checker import GradientChecker, create_op @@ -276,17 +263,17 @@ class TestMulOp(unittest.TestCase): self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} ``` 首先需要`import`必要的包,下面详细解释其他值: - + - `self.type = "mul" ` : 定义类型,和注册的类型一致。 - `self.inputs` : 定义输入,类型为Numpy.array,并初始化。 - `self.outputs` : 定义输出,并得到Python结算结果。 - + ### 反向Operator单测 反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 - ``` + ```python class MulGradOpTest(GradientChecker): def test_mul(self): op = create_op("mul") @@ -294,7 +281,7 @@ class TestMulOp(unittest.TestCase): 'X': np.random.random((32, 84)).astype("float32"), 'Y': np.random.random((84, 100)).astype("float32") } - self.compare_grad(op, inputs) + self.compare_grad(op, inputs) # mul op will enlarge the relative error self.check_grad( op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) @@ -310,7 +297,7 @@ class TestMulOp(unittest.TestCase): - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` -### 编译和执行 +### 编译和执行 单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加编译: From b3ff125d5598da6e362f360220eef816cf0d7f42 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 5 Sep 2017 16:29:57 +0800 Subject: [PATCH 11/26] follow comments. --- doc/howto/dev/new_op_cn.md | 42 +++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 6bd54137ba..85c222163c 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -28,16 +28,18 @@ 内容 | 定义位置 -------------- | :---------------------- -OpProtoMake定义 | `*_op.cc`文件,Backward Op不需要定义OpProtoMake -Op定义 | `*_op.cc`文件 -Kernel实现 | CPU、GPU共享Kernel在`*_op.h`文件,否则,CPU可以在`*_op.cc`文件,GPU可在`*_op.cu`文件。 -注册Op | Op注册在`*_op.cc`文件;Kernel注册CPU在`*_op.cc`文件,GPU在`*_op.cu`文件 +OpProtoMake定义 | `.cc`文件,Backward Op不需要定义OpProtoMake +Op定义 | `.cc`文件 +Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在`.cc`文件,GPU可在`.cu`文件。 +注册Op | Op注册在`.cc`文件;Kernel注册CPU在`.cc`文件,GPU在`.cu`文件 + 实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 + ## 实现C++类 @@ -200,13 +202,18 @@ REGISTER_OP_GPU_KERNEL(mul_grad, ### 5. 编译 -无需修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动加入编译。 +- 简单**无特殊依赖**的OP无需修改CMakeList.txt文件。[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt) 会自动将 `paddle/operators` 目录下新增的 `*_op.cc` 文件加入编译。 +- 较为复杂、**有额外依赖** 的operator仍需要修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)。如,`mul_op` 依赖 `math_function`,需要在`CMakeLists.txt`中添加如下内容: -直接执行下面命令可进行编译: + ``` + op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) + + ``` -``` -make mul_op -``` +- 运行下面命令可以进行编译: + + ``` + make mul_op + ``` ## 绑定Python @@ -235,13 +242,13 @@ make mul_op - 生成库 - 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动被添加链接至生成的lib库中。 + 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会自动被添加链接到生成的lib库中。 ## 实现单元测试 单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 -### 前向Operator单测 +### 前向Operator单元测试 前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 @@ -269,7 +276,7 @@ class TestMulOp(unittest.TestCase): - `self.outputs` : 定义输出,并得到Python结算结果。 -### 反向Operator单测 +### 反向Operator单元测试 反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 @@ -297,21 +304,22 @@ class TestMulOp(unittest.TestCase): - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` -### 编译和执行 +### 编译和执行单元测试 -单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加编译: +单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加以下内容将单测加入工程中: ``` py_test(test_mul_op SRCS test_mul_op.py) ``` -编译时需要打开`WITH_TESTING`, 即 `cmake paddle_dir -DWITH_TESTING=ON`,编译成功之后执行单测命令为: +请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单测: -``` +```bash make test ARGS="-R test_mul_op -V" ``` + 或者: -``` +```bash ctest -R test_mul_op ``` From 020e45f715b1a1cea5dcbee10dacb055b3889523 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 5 Sep 2017 16:54:12 +0800 Subject: [PATCH 12/26] follow comments to revert add_two_op to add_op --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/grad_op_builder_test.cc | 4 ++-- paddle/operators/{add_two_op.cc => add_op.cc} | 7 +++---- paddle/operators/{add_two_op.cu => add_op.cu} | 5 ++--- paddle/operators/{add_two_op.h => add_op.h} | 0 paddle/pybind/pybind.cc | 2 +- python/paddle/v2/framework/tests/test_add_two_op.py | 2 +- python/paddle/v2/framework/tests/test_gradient_checker.py | 2 +- python/paddle/v2/framework/tests/test_net.py | 4 ++-- python/paddle/v2/framework/tests/test_operator.py | 4 ++-- python/paddle/v2/framework/tests/test_recurrent_op.py | 2 +- 11 files changed, 16 insertions(+), 18 deletions(-) rename paddle/operators/{add_two_op.cc => add_op.cc} (88%) rename paddle/operators/{add_two_op.cu => add_op.cu} (82%) rename paddle/operators/{add_two_op.h => add_op.h} (100%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index e138517b6b..c0838d9b75 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -25,7 +25,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) -cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_two_op) +cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index 902c2655e9..9e3ca563c6 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -3,7 +3,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" -USE_OP(add_two); +USE_OP(add); namespace paddle { namespace framework { @@ -41,7 +41,7 @@ namespace f = paddle::framework; TEST(GradOpBuilder, AddTwo) { std::shared_ptr add_op(f::OpRegistry::CreateOp( - "add_two", {{"X", {"x"}}, {"Y", {"y"}}}, {{"Out", {"out"}}}, {})); + "add", {{"X", {"x"}}, {"Y", {"y"}}}, {{"Out", {"out"}}}, {})); std::shared_ptr grad_add_op = f::OpRegistry::CreateGradOp(*add_op); EXPECT_EQ(grad_add_op->Inputs().size(), 4UL); diff --git a/paddle/operators/add_two_op.cc b/paddle/operators/add_op.cc similarity index 88% rename from paddle/operators/add_two_op.cc rename to paddle/operators/add_op.cc index bc99e306e0..8dbd47cf0d 100644 --- a/paddle/operators/add_two_op.cc +++ b/paddle/operators/add_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/add_two_op.h" +#include "paddle/operators/add_op.h" namespace paddle { namespace operators { @@ -57,7 +57,6 @@ class AddOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker, add_two_grad, ops::AddOpGrad); +REGISTER_OP(add, ops::AddOp, ops::AddOpMaker, add_grad, ops::AddOpGrad); -REGISTER_OP_CPU_KERNEL(add_two, - ops::AddKernel); +REGISTER_OP_CPU_KERNEL(add, ops::AddKernel); diff --git a/paddle/operators/add_two_op.cu b/paddle/operators/add_op.cu similarity index 82% rename from paddle/operators/add_two_op.cu rename to paddle/operators/add_op.cu index acc03b2c8b..fdf2c66c7c 100644 --- a/paddle/operators/add_two_op.cu +++ b/paddle/operators/add_op.cu @@ -14,8 +14,7 @@ #define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" -#include "paddle/operators/add_two_op.h" +#include "paddle/operators/add_op.h" namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(add_two, - ops::AddKernel); +REGISTER_OP_GPU_KERNEL(add, ops::AddKernel); diff --git a/paddle/operators/add_two_op.h b/paddle/operators/add_op.h similarity index 100% rename from paddle/operators/add_two_op.h rename to paddle/operators/add_op.h diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 6896422617..dba3d8f285 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -30,7 +30,7 @@ limitations under the License. */ namespace py = pybind11; -USE_OP(add_two); +USE_OP(add); USE_OP(onehot_cross_entropy); USE_OP(sgd); USE_OP(mul); diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index 0def484edd..a578e74eca 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -11,7 +11,7 @@ class TestAddOp(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): - self.type = "add_two" + self.type = "add" self.inputs = { 'X': numpy.random.random((102, 105)).astype("float32"), 'Y': numpy.random.random((102, 105)).astype("float32") diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py index e0b3151208..857427cdfb 100644 --- a/python/paddle/v2/framework/tests/test_gradient_checker.py +++ b/python/paddle/v2/framework/tests/test_gradient_checker.py @@ -7,7 +7,7 @@ from gradient_checker import get_numeric_gradient class GetNumericGradientTest(unittest.TestCase): def test_add_op(self): - add_op = Operator('add_two', X="X", Y="Y", Out="Z") + add_op = Operator('add', X="X", Y="Y", Out="Z") x = numpy.random.random((10, 1)).astype("float32") y = numpy.random.random((10, 1)).astype("float32") diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index 9339cf28da..e4b7cd480c 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -15,7 +15,7 @@ def fc(X, W, Y): class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() - op1 = Operator("add_two", X="X", Y="Y", Out="Out") + op1 = Operator("add", X="X", Y="Y", Out="Out") net.append_op(op1) net2 = core.Net.create() @@ -26,7 +26,7 @@ class TestNet(unittest.TestCase): expected = ''' Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. - Op(add_two), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. + Op(add), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. diff --git a/python/paddle/v2/framework/tests/test_operator.py b/python/paddle/v2/framework/tests/test_operator.py index 1abc4eeb57..040556322d 100644 --- a/python/paddle/v2/framework/tests/test_operator.py +++ b/python/paddle/v2/framework/tests/test_operator.py @@ -193,10 +193,10 @@ class TestOpDescCreationMethod(unittest.TestCase): class TestOpCreations(unittest.TestCase): def test_all(self): - add_op = op.Operator("add_two", X="a", Y="b", Out="z") + add_op = op.Operator("add", X="a", Y="b", Out="z") self.assertIsNotNone(add_op) # Invoke C++ DebugString() - self.assertEqual('Op(add_two), inputs:{X[a], Y[b]}, outputs:{Out[z]}.', + self.assertEqual('Op(add), inputs:{X[a], Y[b]}, outputs:{Out[z]}.', str(add_op)) diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index d6000ab9f9..22e680fd78 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -146,7 +146,7 @@ class TestRecurrentOp(unittest.TestCase): stepnet = core.Net.create() x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: From e687f3f540d3a403ab376f6c533362a6e6c577ff Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Tue, 5 Sep 2017 17:37:12 +0800 Subject: [PATCH 13/26] Make attribute support for std::vector> --- paddle/framework/attribute.cc | 12 ++++++++++++ paddle/framework/attribute.h | 3 ++- paddle/framework/framework.proto | 7 +++++++ python/paddle/v2/framework/op.py | 7 ++++++- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/paddle/framework/attribute.cc b/paddle/framework/attribute.cc index 9eb07acdff..27132eaa0b 100644 --- a/paddle/framework/attribute.cc +++ b/paddle/framework/attribute.cc @@ -43,6 +43,10 @@ template <> AttrType AttrTypeID>() { return STRINGS; } +template <> +AttrType AttrTypeID>>() { + return INT_PAIRS; +} Attribute GetAttrValue(const OpDesc::Attr& attr_desc) { switch (attr_desc.type()) { @@ -76,6 +80,14 @@ Attribute GetAttrValue(const OpDesc::Attr& attr_desc) { } return val; } + case paddle::framework::AttrType::INT_PAIRS: { + std::vector> val(attr_desc.int_pairs_size()); + for (int i = 0; i < attr_desc.int_pairs_size(); ++i) { + val[i].first = attr_desc.int_pairs(i).first(); + val[i].second = attr_desc.int_pairs(i).second(); + } + return val; + } } PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !"); return boost::blank(); diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 08b47cabd4..071879a9d4 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -28,7 +28,8 @@ namespace paddle { namespace framework { typedef boost::variant, - std::vector, std::vector> + std::vector, std::vector, + std::vector>> Attribute; typedef std::unordered_map AttributeMap; diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto index ae44a1ffd4..368136a972 100644 --- a/paddle/framework/framework.proto +++ b/paddle/framework/framework.proto @@ -22,8 +22,14 @@ enum AttrType { INTS = 3; FLOATS = 4; STRINGS = 5; + INT_PAIRS = 6; } +message IntPair { + required int32 first = 1; + required int32 second = 2; +}; + // OpDesc describes an instance of a C++ framework::OperatorBase // derived class type. message OpDesc { @@ -37,6 +43,7 @@ message OpDesc { repeated int32 ints = 6; repeated float floats = 7; repeated string strings = 8; + repeated IntPair int_pairs = 9; }; message Var { diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index e7e932f6fe..0349407a85 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -94,9 +94,14 @@ class OpDescCreationMethod(object): new_attr.floats.extend(user_defined_attr) elif attr.type == framework_pb2.STRINGS: new_attr.strings.extend(user_defined_attr) + elif attr.type == framework_pb2.INT_PAIRS: + for p in user_defined_attr: + pair = new_attr.pairs.add() + pair.first = p[0] + pair.second = p[1] else: raise NotImplementedError("Not support attribute type " + - attr.type) + str(attr.type)) return op_desc From 2beaf3aa2a966980177ea0a0d309fd7a7a594a1c Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 5 Sep 2017 18:12:42 +0800 Subject: [PATCH 14/26] auto find .cc or .cu in operator/CMakeLists.txt --- paddle/operators/CMakeLists.txt | 54 +++++++++++++++------------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 90185101c4..99b8e1967f 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -14,27 +14,31 @@ function(op_library TARGET) cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - foreach(src ${op_library_SRCS}) - if (${src} MATCHES ".*\\.cu$") - list(APPEND cu_srcs ${src}) - elseif(${src} MATCHES ".*\\.cc$") - list(APPEND cc_srcs ${src}) - else() - message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu") + list(LENGTH op_library_SRCS op_library_SRCS_len) + if (${op_library_SRCS_len} EQUAL 0) + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) endif() - endforeach() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cc) + endif() + else() + foreach(src ${op_library_SRCS}) + if (${src} MATCHES ".*\\.cu$") + list(APPEND cu_srcs ${src}) + elseif(${src} MATCHES ".*\\.cc$") + list(APPEND cc_srcs ${src}) + else() + message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu") + endif() + endforeach() + endif() list(LENGTH cc_srcs cc_srcs_len) if (${cc_srcs_len} EQUAL 0) message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") endif() - list(LENGTH cu_srcs cu_srcs_len) - list(LENGTH op_library_DEPS dep_len) - if (${cu_srcs_len} EQUAL 0 AND ${dep_len} EQUAL 0) - message(WARNING "The op library ${TARGET} not support GPU!") - endif() - if (WITH_GPU) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) @@ -46,32 +50,22 @@ endfunction() add_subdirectory(math) -set(ONLYCPU_OPS - net_op - gather_op - scatter_op) -foreach(src ${ONLYCPU_OPS}) - op_library(${src} SRCS ${src}.cc) -endforeach() - set(DEPS_OPS identity_op minus_op mul_op recurrent_op scale_op) -op_library(identity_op SRCS identity_op.cc DEPS scale_op) -op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) -op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) +op_library(identity_op DEPS scale_op) +op_library(minus_op DEPS scale_op) +op_library(mul_op DEPS math_function) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) -op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) +op_library(scale_op DEPS net_op) -list(REMOVE_ITEM GENERAL_OPS - ${ONLYCPU_OPS} - ${DEPS_OPS}) +list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) - op_library(${src} SRCS ${src}.cc ${src}.cu) + op_library(${src}) endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") From 42513496f30c137e42b54fab6ce275845349bccf Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 5 Sep 2017 22:17:24 +0800 Subject: [PATCH 15/26] fix a type error in operator/CMakeLists.txt --- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/add_op.cu | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 99b8e1967f..8a0ff1eb53 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -20,7 +20,7 @@ function(op_library TARGET) list(APPEND cc_srcs ${TARGET}.cc) endif() if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) - list(APPEND cu_srcs ${TARGET}.cc) + list(APPEND cu_srcs ${TARGET}.cu) endif() else() foreach(src ${op_library_SRCS}) diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index fdf2c66c7c..d9c6d20a6c 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -12,8 +12,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU -#include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" namespace ops = paddle::operators; From dc520da763f258e6ca1c79f79bb82bc55ec4cf14 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 5 Sep 2017 23:08:21 +0800 Subject: [PATCH 16/26] update doc of softmax_op. --- paddle/operators/softmax_op.cc | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 40c51a64c4..7d062ad67c 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -24,7 +24,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, - "The input of softmax op must be matrix"); + "The input of softmax op must be a matrix."); ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -34,9 +34,27 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { SoftmaxOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "input of softmax"); - AddOutput("Y", "output of softmax"); - AddComment("Softmax Op"); + AddInput("X", + "The input tensor of softmax. " + "2-D with shape [batch_size, input_feature_dimensions]."); + AddOutput("Y", "The normalized values with the same shape as X."); + AddComment(R"DOC( +The input of softmax operator is a 2-D tensor with shape N x K (N is the +batch_size, K is the dimension of input feature). The output tensor has the +same shape as the input tensor. + +For each row of the input tensor, the softmax operator squashes the +K-dimensional vector of arbitrary real values to a K-dimensional vector of real +values in the range [0, 1] that add up to 1. Specifically, it computes the +exponential of the given dimension and the sum of exponential values of all +the other dimensions in the K-dimensional vector input. Then the ratio of the +exponential of the given dimension and the sum of exponential values of all +the other dimensions is the output of the softmax operator. + +For each row `i` and each column `j` in X, we have: + Y[i, j] = exp(X[i, j]) / sum_j(exp(X[i, j])) + +)DOC"); } }; From bc0f9495f019d9f72bf9d6a82f97673c7dce0063 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 5 Sep 2017 13:43:51 -0700 Subject: [PATCH 17/26] Move two tests form `op_registry_test` to `operator_test` 1. TEST(ProtoMaker, DuplicatedAttr) 2. TEST(ProtoMaker, DuplicatedInOut) --- paddle/framework/op_registry_test.cc | 34 +--------------------------- paddle/framework/operator_test.cc | 34 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 50c45919c5..b43f6a8cc5 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -174,36 +174,4 @@ TEST(OpRegistry, CustomChecker) { op->Run(scope, dev_ctx); int test_attr = op->GetAttr("test_attr"); ASSERT_EQ(test_attr, 4); -} - -class TestAttrProtoMaker : public pd::OpProtoAndCheckerMaker { - public: - TestAttrProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("scale", "scale of test op"); - AddAttr("scale", "scale of test op"); - } -}; - -TEST(ProtoMaker, DuplicatedAttr) { - pd::OpProto op_proto; - pd::OpAttrChecker op_checker; - auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); - ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); -} - -class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker { - public: - TestInOutProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("input", "input of test op"); - AddInput("input", "input of test op"); - } -}; - -TEST(ProtoMaker, DuplicatedInOut) { - pd::OpProto op_proto; - pd::OpAttrChecker op_checker; - auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); - ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); -} +} \ No newline at end of file diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index f7c9e6b196..8a1970c7a8 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -263,4 +263,38 @@ TEST(Operator, Clone) { OperatorClone a("ABC", {}, {}, {}); auto b = a.Clone(); ASSERT_EQ(a.Type(), b->Type()); +} + +class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { + public: + TestAttrProtoMaker(paddle::framework::OpProto* proto, + paddle::framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddAttr("scale", "scale of test op"); + AddAttr("scale", "scale of test op"); + } +}; + +TEST(ProtoMaker, DuplicatedAttr) { + paddle::framework::OpProto op_proto; + paddle::framework::OpAttrChecker op_checker; + auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); + ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); +} + +class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { + public: + TestInOutProtoMaker(paddle::framework::OpProto* proto, + paddle::framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("input", "input of test op"); + AddInput("input", "input of test op"); + } +}; + +TEST(ProtoMaker, DuplicatedInOut) { + paddle::framework::OpProto op_proto; + paddle::framework::OpAttrChecker op_checker; + auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); + ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); } \ No newline at end of file From fd0e1e893f22e1ef27fb9f1e6d12c590d2fcdeea Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 5 Sep 2017 17:42:14 -0700 Subject: [PATCH 18/26] Fix warnings in lookup_op --- paddle/operators/lookup_table_op.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index 4da8079b91..877b36cef4 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -30,12 +30,12 @@ class LookupTableKernel : public framework::OpKernel { auto ids_t = context.Input("Ids"); // int tensor auto output_t = context.Output("Out"); // float tensor - size_t N = table_t->dims()[0]; - size_t D = table_t->dims()[1]; + int N = table_t->dims()[0]; + int D = table_t->dims()[1]; auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); - for (size_t i = 0; i < product(ids_t->dims()); ++i) { + for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); @@ -51,8 +51,8 @@ class LookupTableGradKernel : public framework::OpKernel { auto d_output_t = context.Input(framework::GradVarName("Out")); auto d_table_t = context.Output(framework::GradVarName("W")); - size_t N = d_table_t->dims()[0]; - size_t D = d_table_t->dims()[1]; + int N = d_table_t->dims()[0]; + int D = d_table_t->dims()[1]; auto ids = ids_t->data(); const T* d_output = d_output_t->data(); T* d_table = d_table_t->mutable_data(context.GetPlace()); @@ -61,10 +61,10 @@ class LookupTableGradKernel : public framework::OpKernel { t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); - for (size_t i = 0; i < product(ids_t->dims()); ++i) { + for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); - for (size_t j = 0; j < D; ++j) { + for (int j = 0; j < D; ++j) { d_table[ids[i] * D + j] += d_output[i * D + j]; } } From 5d9478094d8721f312d9fec323920e608ed23e66 Mon Sep 17 00:00:00 2001 From: lispc Date: Wed, 6 Sep 2017 11:33:16 +0800 Subject: [PATCH 19/26] PyDataProvider2.InputType repr refine style --- python/paddle/trainer/PyDataProvider2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py index 033c71cf8f..248da4ae8d 100644 --- a/python/paddle/trainer/PyDataProvider2.py +++ b/python/paddle/trainer/PyDataProvider2.py @@ -87,7 +87,8 @@ class InputType(object): def __repr__(self): """ - Return a human readable representation like 'InputType(dim=25921, seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)' + Return a human readable representation like 'InputType(dim=25921, + seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)' """ repr_str = type(self).__name__ repr_str += '(' From 11163dfc7a6c1504c42acf77bd48d677d7167941 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 6 Sep 2017 11:54:33 +0800 Subject: [PATCH 20/26] make dim int to int64_t --- paddle/framework/ddim.cc | 64 ++++++++++----------- paddle/framework/ddim.h | 20 +++---- paddle/framework/ddim_test.cc | 4 +- paddle/framework/dim.h | 67 +++++++++++----------- paddle/framework/dim_test.cu | 4 +- paddle/framework/eigen.h | 2 +- paddle/framework/tensor_impl.h | 4 +- paddle/operators/gaussian_random_op.cc | 10 +++- paddle/operators/rnn/recurrent_op_utils.cc | 2 +- paddle/operators/uniform_random_op.cc | 10 +++- paddle/pybind/pybind.cc | 2 +- paddle/pybind/tensor_py.h | 4 +- 12 files changed, 102 insertions(+), 91 deletions(-) diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index cfd3e8dfde..85b7de7974 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -21,16 +21,16 @@ namespace framework { /// @cond HIDDEN template -Dim make_dim(const int* d) { +Dim make_dim(const int64_t* d) { return Dim(*d, make_dim(d + 1)); } template <> -Dim<1> make_dim<1>(const int* d) { +Dim<1> make_dim<1>(const int64_t* d) { return Dim<1>(*d); } -void make_ddim(DDim& ddim, const int* dims, int n) { +void make_ddim(DDim& ddim, const int64_t* dims, int n) { switch (n) { case 1: ddim = make_dim<1>(dims); @@ -67,13 +67,13 @@ void make_ddim(DDim& ddim, const int* dims, int n) { /// @endcond -DDim make_ddim(std::initializer_list dims) { +DDim make_ddim(std::initializer_list dims) { DDim result(make_dim(0)); make_ddim(result, dims.begin(), dims.size()); return result; } -DDim make_ddim(const std::vector& dims) { +DDim make_ddim(const std::vector& dims) { DDim result(make_dim(0)); make_ddim(result, &dims[0], dims.size()); return result; @@ -81,12 +81,12 @@ DDim make_ddim(const std::vector& dims) { /// @cond HIDDEN // XXX For some reason, putting this in an anonymous namespace causes errors -class DynamicMutableIndexer : public boost::static_visitor { +class DynamicMutableIndexer : public boost::static_visitor { public: explicit DynamicMutableIndexer(int idx) : idx_(idx) {} template - int& operator()(Dim& dim) const { + int64_t& operator()(Dim& dim) const { return dim[idx_]; } @@ -94,12 +94,12 @@ class DynamicMutableIndexer : public boost::static_visitor { int idx_; }; -class DynamicConstIndexer : public boost::static_visitor { +class DynamicConstIndexer : public boost::static_visitor { public: explicit DynamicConstIndexer(int idx) : idx_(idx) {} template - int operator()(const Dim& dim) const { + int64_t operator()(const Dim& dim) const { return dim[idx_]; } @@ -109,22 +109,22 @@ class DynamicConstIndexer : public boost::static_visitor { /// @endcond -int& DDim::operator[](int idx) { +int64_t& DDim::operator[](int idx) { return boost::apply_visitor(DynamicMutableIndexer(idx), var); } -int DDim::operator[](int idx) const { +int64_t DDim::operator[](int idx) const { return boost::apply_visitor(DynamicConstIndexer(idx), var); } -ssize_t DDim::size() const { return arity(*this); } +int64_t DDim::size() const { return arity(*this); } bool DDim::operator==(DDim d) const { if (var.which() != d.getVar().which()) { return false; } else { - std::vector v1 = vectorize(*this); - std::vector v2 = vectorize(d); + std::vector v1 = vectorize(*this); + std::vector v2 = vectorize(d); for (unsigned int i = 0; i < v1.size(); i++) { if (v1[i] != v2[i]) { @@ -139,10 +139,10 @@ bool DDim::operator==(DDim d) const { bool DDim::operator!=(DDim d) const { return !(*this == d); } DDim DDim::operator+(DDim d) const { - std::vector v1 = vectorize(*this); - std::vector v2 = vectorize(d); + std::vector v1 = vectorize(*this); + std::vector v2 = vectorize(d); - std::vector v3; + std::vector v3; assert(v1.size() == v2.size()); @@ -154,10 +154,10 @@ DDim DDim::operator+(DDim d) const { } DDim DDim::operator*(DDim d) const { - std::vector v1 = vectorize(*this); - std::vector v2 = vectorize(d); + std::vector v1 = vectorize(*this); + std::vector v2 = vectorize(d); - std::vector v3; + std::vector v3; assert(v1.size() == v2.size()); @@ -168,15 +168,15 @@ DDim DDim::operator*(DDim d) const { return make_ddim(v3); } -int get(const DDim& ddim, int idx) { return ddim[idx]; } +int64_t get(const DDim& ddim, int idx) { return ddim[idx]; } void set(DDim& ddim, int idx, int value) { ddim[idx] = value; } /// @cond HIDDEN struct VectorizeVisitor : public boost::static_visitor<> { - std::vector& vector; + std::vector& vector; - explicit VectorizeVisitor(std::vector& v) : vector(v) {} + explicit VectorizeVisitor(std::vector& v) : vector(v) {} template void operator()(const T& t) { @@ -188,31 +188,31 @@ struct VectorizeVisitor : public boost::static_visitor<> { }; /// @endcond -std::vector vectorize(const DDim& ddim) { - std::vector result; +std::vector vectorize(const DDim& ddim) { + std::vector result; VectorizeVisitor visitor(result); boost::apply_visitor(visitor, ddim); return result; } -struct ProductVisitor : public boost::static_visitor { +struct ProductVisitor : public boost::static_visitor { template - ssize_t operator()(const Dim& dim) { + int64_t operator()(const Dim& dim) { return product(dim); } }; -ssize_t product(const DDim& ddim) { +int64_t product(const DDim& ddim) { ProductVisitor visitor; return boost::apply_visitor(visitor, ddim); } struct SliceVectorizeVisitor : public boost::static_visitor<> { - std::vector& vector; + std::vector& vector; int begin; int end; - SliceVectorizeVisitor(std::vector& v, int b, int e) + SliceVectorizeVisitor(std::vector& v, int b, int e) : vector(v), begin(b), end(e) { PADDLE_ENFORCE(begin < end, "Begin index must be less than end index in ddim slice."); @@ -240,7 +240,7 @@ struct SliceVectorizeVisitor : public boost::static_visitor<> { }; DDim slice_ddim(const DDim& dim, int begin, int end) { - std::vector vec; + std::vector vec; vec.reserve(end - begin); SliceVectorizeVisitor visitor(vec, begin, end); boost::apply_visitor(visitor, dim); @@ -280,7 +280,7 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { return os; } -DDim::DDim(std::initializer_list init_list) { +DDim::DDim(std::initializer_list init_list) { *this = make_ddim(init_list); } } // namespace framework diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 95f294b627..db30c52394 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -40,7 +40,7 @@ struct DDim { template explicit DDim(const Dim& in) : var(in) {} - /*implicit*/ DDim(std::initializer_list init_list); + /*implicit*/ DDim(std::initializer_list init_list); template DDim& operator=(const Dim& in) { @@ -48,8 +48,8 @@ struct DDim { return *this; } - int& operator[](int idx); - int operator[](int idx) const; + int64_t& operator[](int idx); + int64_t operator[](int idx) const; template typename Visitor::result_type apply_visitor(Visitor& visitor) { @@ -71,15 +71,15 @@ struct DDim { DDim operator*(DDim d) const; - ssize_t size() const; + int64_t size() const; }; /** - * \brief Make a DDim from std::vector + * \brief Make a DDim from std::vector * * \param dims An vector of ints. Must be sized between [1, 9] */ -DDim make_ddim(const std::vector& dims); +DDim make_ddim(const std::vector& dims); /** * \brief Make a DDim from an initializer list @@ -87,14 +87,14 @@ DDim make_ddim(const std::vector& dims); * \param dims An initializer list of ints. Must be sized between [1, 9] * */ -DDim make_ddim(std::initializer_list dims); +DDim make_ddim(std::initializer_list dims); -int get(const DDim& dim, int idx); +int64_t get(const DDim& dim, int idx); void set(DDim& dim, int idx, int val); -std::vector vectorize(const DDim& ddim); +std::vector vectorize(const DDim& ddim); -ssize_t product(const DDim& ddim); +int64_t product(const DDim& ddim); /** * \brief Slice a ddim diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index 9d18a2972c..756232b1b5 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -12,7 +12,7 @@ TEST(DDim, Equality) { EXPECT_EQ(ddim[2], 5); // construct a DDim from a vector - std::vector vec({9, 1, 5}); + std::vector vec({9, 1, 5}); paddle::framework::DDim vddim = paddle::framework::make_ddim(vec); EXPECT_EQ(ddim[0], 9); EXPECT_EQ(ddim[1], 1); @@ -25,7 +25,7 @@ TEST(DDim, Equality) { EXPECT_EQ(paddle::framework::get(ddim, 0), 6); // vectorize a DDim - std::vector res_vec = paddle::framework::vectorize(vddim); + std::vector res_vec = paddle::framework::vectorize(vddim); EXPECT_EQ(res_vec[0], 9); EXPECT_EQ(res_vec[1], 1); EXPECT_EQ(res_vec[2], 5); diff --git a/paddle/framework/dim.h b/paddle/framework/dim.h index 883fdc55eb..04d4b0e604 100644 --- a/paddle/framework/dim.h +++ b/paddle/framework/dim.h @@ -17,13 +17,13 @@ struct Dim { static constexpr int dimensions = i; template - HOSTDEVICE Dim(int _head, Args... _tail) : head(_head), tail(_tail...) { + HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) { static_assert(sizeof...(_tail) == i - 1, "Dim initialized with the wrong number of parameters"); } HOSTDEVICE - Dim(int _head, const Dim& _tail) : head(_head), tail(_tail) {} + Dim(int64_t _head, const Dim& _tail) : head(_head), tail(_tail) {} HOSTDEVICE Dim() : head(0), tail() {} @@ -31,12 +31,12 @@ struct Dim { /** Construct a Dim from a linear index and size. Uses Fortran order * indexing. */ HOSTDEVICE - Dim(int idx, const Dim& size) + Dim(int64_t idx, const Dim& size) : head(idx % size.head), tail(idx / size.head, size.tail) {} /** Construct a Dim with each dimension set to the given index */ HOSTDEVICE - Dim(int idx) : head(idx), tail(idx) {} + Dim(int64_t idx) : head(idx), tail(idx) {} HOSTDEVICE bool operator==(const Dim& o) const { @@ -47,13 +47,13 @@ struct Dim { bool operator!=(const Dim& o) const { return !(*this == o); } HOSTDEVICE - int& operator[](int idx); + int64_t& operator[](int idx); HOSTDEVICE - int operator[](int idx) const; + int64_t operator[](int idx) const; HOST std::string to_string() const; - int head; + int64_t head; Dim tail; }; @@ -63,7 +63,7 @@ struct Dim<1> { static constexpr int dimensions = 1; HOSTDEVICE - Dim(int _head) : head(_head) {} + Dim(int64_t _head) : head(_head) {} HOSTDEVICE Dim() : head(0) {} @@ -86,11 +86,11 @@ struct Dim<1> { bool operator!=(const Dim<1>& o) const { return !(*this == o); } HOSTDEVICE - int& operator[](int idx); + int64_t& operator[](int idx); HOSTDEVICE - int operator[](int idx) const; + int64_t operator[](int idx) const; - int head; + int64_t head; }; namespace { @@ -100,12 +100,12 @@ template struct DimGetter { // Return a copy if Dim is const template - HOSTDEVICE static int impl(const D& d) { + HOSTDEVICE static int64_t impl(const D& d) { return DimGetter::impl(d.tail); } // Return a reference if Dim is mutable template - HOSTDEVICE static int& impl(D& d) { + HOSTDEVICE static int64_t& impl(D& d) { return DimGetter::impl(d.tail); } }; @@ -115,18 +115,18 @@ template <> struct DimGetter<0> { // Return a copy if Dim is const template - HOSTDEVICE static int impl(const D& d) { + HOSTDEVICE static int64_t impl(const D& d) { return d.head; } // Return a reference if Dim is mutable template - HOSTDEVICE static int& impl(D& d) { + HOSTDEVICE static int64_t& impl(D& d) { return d.head; } }; template -HOSTDEVICE int& indexer(Dim& dim, int idx) { +HOSTDEVICE int64_t& indexer(Dim& dim, int idx) { #ifndef __CUDA_ARCH__ if (idx < 0) { throw std::invalid_argument("Tried to access a negative dimension"); @@ -141,7 +141,7 @@ HOSTDEVICE int& indexer(Dim& dim, int idx) { } template <> -HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) { +HOSTDEVICE int64_t& indexer<1>(Dim<1>& dim, int idx) { #ifndef __CUDA_ARCH__ if (idx != 0) { throw std::invalid_argument("Invalid index"); @@ -153,7 +153,7 @@ HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) { } template -HOSTDEVICE int indexer(const Dim& dim, int idx) { +HOSTDEVICE int64_t indexer(const Dim& dim, int idx) { #ifndef __CUDA_ARCH__ if (idx < 0) { throw std::invalid_argument("Tried to access a negative dimension"); @@ -168,7 +168,7 @@ HOSTDEVICE int indexer(const Dim& dim, int idx) { } template <> -HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) { +HOSTDEVICE int64_t indexer<1>(const Dim<1>& dim, int idx) { #ifndef __CUDA_ARCH__ if (idx != 0) { throw std::invalid_argument("Invalid index"); @@ -182,73 +182,76 @@ HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) { } // namespace // Static access to constant Dim template -HOSTDEVICE int get(const Dim& d) { +HOSTDEVICE int64_t get(const Dim& d) { return DimGetter::impl(d); } // Static access to mutable Dim template -HOSTDEVICE int& get(Dim& d) { +HOSTDEVICE int64_t& get(Dim& d) { return DimGetter::impl(d); } // Dynamic access to constant Dim template -HOSTDEVICE int Dim::operator[](int i) const { +HOSTDEVICE int64_t Dim::operator[](int i) const { return indexer(*this, i); } // Dynamic access to mutable Dim template -HOSTDEVICE int& Dim::operator[](int i) { +HOSTDEVICE int64_t& Dim::operator[](int i) { return indexer(*this, i); } // Dynamic access to constant Dim -inline HOSTDEVICE int Dim<1>::operator[](int i) const { +inline HOSTDEVICE int64_t Dim<1>::operator[](int i) const { return indexer(*this, i); } // Dynamic access to mutable Dim -inline HOSTDEVICE int& Dim<1>::operator[](int i) { return indexer(*this, i); } +inline HOSTDEVICE int64_t& Dim<1>::operator[](int i) { + return indexer(*this, i); +} // Dynamic access to constant Dim // without std::enable_if will try to instantiate this on get<0>(d) template -HOSTDEVICE typename std::enable_if<(l > 0), int>::type get(const Dim& d, - int i) { +HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim& d, + int i) { return d[i]; } // Dynamic access to mutable Dim template -HOSTDEVICE typename std::enable_if<(l > 0), int&>::type get(Dim& d, int i) { +HOSTDEVICE typename std::enable_if<(l > 0), int64_t&>::type get(Dim& d, + int i) { return d[i]; } // Dot product of two dims template -HOSTDEVICE int linearize(const Dim& a, const Dim& b) { +HOSTDEVICE int64_t linearize(const Dim& a, const Dim& b) { return a.head * b.head + linearize(a.tail, b.tail); } // Base case dot product of two Dims // Notice it is inline because it is no longer a template template <> -HOSTDEVICE inline int linearize(const Dim<1>& a, const Dim<1>& b) { +HOSTDEVICE inline int64_t linearize(const Dim<1>& a, const Dim<1>& b) { return a.head * b.head; } // Product of a Dim template -HOSTDEVICE int product(const Dim& a, int prod = 1) { +HOSTDEVICE int64_t product(const Dim& a, int prod = 1) { return prod * a.head * product(a.tail); } // Base case product of a Dim // Notice it is inline because it is no longer a template template <> -HOSTDEVICE inline int product(const Dim<1>& a, int prod) { +HOSTDEVICE inline int64_t product(const Dim<1>& a, int prod) { return prod * a.head; } diff --git a/paddle/framework/dim_test.cu b/paddle/framework/dim_test.cu index 3898d0a447..f42b1eeadf 100644 --- a/paddle/framework/dim_test.cu +++ b/paddle/framework/dim_test.cu @@ -47,9 +47,9 @@ TEST(Dim, Equality) { EXPECT_EQ(b[1], 11); // dynamic access on GPU - thrust::device_vector r(1); + thrust::device_vector r(1); dyn_idx_gpu<<<1, 1>>>(thrust::raw_pointer_cast(r.data())); - int res = r[0]; + int64_t res = r[0]; EXPECT_EQ(res, 6); // ex_prefix_mul diff --git a/paddle/framework/eigen.h b/paddle/framework/eigen.h index a4667cc51f..2d8d9ae10c 100644 --- a/paddle/framework/eigen.h +++ b/paddle/framework/eigen.h @@ -28,7 +28,7 @@ struct EigenDim { static Type From(const DDim& dims) { PADDLE_ENFORCE(arity(dims) == D, "D must match arity(DDim)"); Type ret; - for (int d = 0; d < arity(dims); d++) { + for (int64_t d = 0; d < arity(dims); d++) { ret[d] = dims[d]; } return ret; diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 7893e233b7..94f436294f 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -58,7 +58,7 @@ inline T* Tensor::mutable_data(platform::Place place) { "Tensor's numel must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); /* some versions of boost::variant don't have operator!= */ - size_t size = product(dims_) * sizeof(T); + int64_t size = product(dims_) * sizeof(T); if (holder_ == nullptr || !(holder_->place() == place) || holder_->size() < size + offset_) { if (platform::is_cpu_place(place)) { @@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { PADDLE_ENFORCE_LT(begin_idx, end_idx, "Begin index must be less than end index."); PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1."); - int base = product(dims_) / dims_[0]; + size_t base = product(dims_) / dims_[0]; Tensor dst; dst.holder_ = holder_; DDim dst_dims = dims_; diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 056447901d..583c9e5774 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -31,8 +31,8 @@ class CPUGaussianRandomKernel : public framework::OpKernel { } engine.seed(seed); std::normal_distribution dist(mean, std); - ssize_t size = framework::product(tensor->dims()); - for (ssize_t i = 0; i < size; ++i) { + int64_t size = framework::product(tensor->dims()); + for (int64_t i = 0; i < size; ++i) { data[i] = dist(engine); } } @@ -46,9 +46,13 @@ class GaussianRandomOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext& context) const override { auto* tensor = context.Output("Out"); auto dims = GetAttr>("dims"); + std::vector temp(dims.size()); + for (auto dim : dims) { + temp.push_back(static_cast(dim)); + } PADDLE_ENFORCE(dims.size() > 0UL, "dims can be one int or array. dims must be set."); - tensor->Resize(framework::make_ddim(dims)); + tensor->Resize(framework::make_ddim(temp)); } }; diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index a9b65c30f2..69e723b401 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -61,7 +61,7 @@ void ConcatOutputs(const std::vector& step_scopes, PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i].internal); f::DDim step_dims = step_scope_var->template GetMutable()->dims(); - std::vector dims_vec = vectorize(step_dims); + std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); output->Resize(f::make_ddim(dims_vec)); } else { diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 2d943c4508..3241d4289a 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -35,8 +35,8 @@ class CPUUniformRandomKernel : public framework::OpKernel { std::uniform_real_distribution dist( static_cast(context.GetAttr("min")), static_cast(context.GetAttr("max"))); - ssize_t size = framework::product(tensor->dims()); - for (ssize_t i = 0; i < size; ++i) { + int64_t size = framework::product(tensor->dims()); + for (int64_t i = 0; i < size; ++i) { data[i] = dist(engine); } } @@ -52,7 +52,11 @@ class UniformRandomOp : public framework::OperatorWithKernel { "uniform_random's min must less then max"); auto* tensor = ctx.Output("Out"); auto dims = GetAttr>("dims"); - tensor->Resize(framework::make_ddim(dims)); + std::vector temp(dims.size()); + for (auto dim : dims) { + temp.push_back(static_cast(dim)); + } + tensor->Resize(framework::make_ddim(temp)); } }; diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 6896422617..7db629a68f 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -76,7 +76,7 @@ PYBIND11_PLUGIN(core) { .def("get_dims", [](const Tensor &self) { return vectorize(self.dims()); }) .def("set_dims", - [](Tensor &self, const std::vector &dim) { + [](Tensor &self, const std::vector &dim) { self.Resize(make_ddim(dim)); }) .def("alloc_float", diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 39ba60b4dc..95171acf72 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -85,7 +85,7 @@ void PyCPUTensorSetFromArray( framework::Tensor &self, py::array_t array, paddle::platform::CPUPlace &place) { - std::vector dims; + std::vector dims; dims.reserve(array.ndim()); for (size_t i = 0; i < array.ndim(); ++i) { dims.push_back((int)array.shape()[i]); @@ -102,7 +102,7 @@ void PyCUDATensorSetFromArray( framework::Tensor &self, py::array_t array, paddle::platform::GPUPlace &place) { - std::vector dims; + std::vector dims; dims.reserve(array.ndim()); for (size_t i = 0; i < array.ndim(); ++i) { dims.push_back((int)array.shape()[i]); From 857cd24a35bef9c182292ca709fe1dd9d0e52e19 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 6 Sep 2017 12:18:23 +0800 Subject: [PATCH 21/26] fix dim_test.cu bug --- paddle/framework/dim_test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/dim_test.cu b/paddle/framework/dim_test.cu index f42b1eeadf..0a6a87669c 100644 --- a/paddle/framework/dim_test.cu +++ b/paddle/framework/dim_test.cu @@ -8,7 +8,7 @@ __global__ void test(paddle::framework::Dim<2>* o) { o[0] = paddle::framework::make_dim(5, 6); } -__global__ void dyn_idx_gpu(int* o) { +__global__ void dyn_idx_gpu(int64_t* o) { auto d = paddle::framework::make_dim(5, 6); o[0] = d[1]; } From a5f1e6d6bdd094253cd6f47bc7bbefb1fbc3b1e4 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 6 Sep 2017 12:58:00 +0800 Subject: [PATCH 22/26] Update cos_sim operator by following reviewer's comments. --- paddle/operators/cos_sim_op.cc | 4 +- paddle/operators/cos_sim_op.h | 97 ++++++++++--------- .../v2/framework/tests/test_cos_sim_op.py | 38 +++++--- 3 files changed, 76 insertions(+), 63 deletions(-) diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index 3760d0b161..c033af3b74 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -90,8 +90,8 @@ class CosSimOpGrad : public framework::OperatorWithKernel { auto *x_grad = ctx.Output(framework::GradVarName("X")); auto *y_grad = ctx.Output(framework::GradVarName("Y")); - x_grad->Resize(x_dims); - y_grad->Resize(y_dims); + if (x_grad) x_grad->Resize(x_dims); + if (y_grad) y_grad->Resize(y_dims); } }; diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 69d35d8bc2..9e3ff26815 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -28,30 +28,30 @@ template class CosSimKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* z = context.Output("Out"); - auto* x_norm = context.Output("XNorm"); - auto* y_norm = context.Output("YNorm"); + auto* input_x = context.Input("X"); + auto* input_y = context.Input("Y"); + auto* output_z = context.Output("Out"); + auto* output_x_norm = context.Output("XNorm"); + auto* output_y_norm = context.Output("YNorm"); - z->mutable_data(context.GetPlace()); - x_norm->mutable_data(context.GetPlace()); - y_norm->mutable_data(context.GetPlace()); + output_z->mutable_data(context.GetPlace()); + output_x_norm->mutable_data(context.GetPlace()); + output_y_norm->mutable_data(context.GetPlace()); - auto dims = x->dims(); + auto dims = input_x->dims(); int size = static_cast(framework::product(dims)); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); - auto X = EigenMatrix::From(*x, new_dims); - auto Y = EigenMatrix::From(*y, new_dims); - auto Z = EigenMatrix::From(*z); - auto XNorm = EigenMatrix::From(*x_norm); - auto YNorm = EigenMatrix::From(*y_norm); + auto x = EigenMatrix::From(*input_x, new_dims); + auto y = EigenMatrix::From(*input_y, new_dims); + auto z = EigenMatrix::From(*output_z); + auto x_norm = EigenMatrix::From(*output_x_norm); + auto y_norm = EigenMatrix::From(*output_y_norm); auto place = context.GetEigenDevice(); - auto XY = (X * Y).sum(Eigen::array({1})); - XNorm.device(place) = (X * X).sum(Eigen::array({1})).sqrt(); - YNorm.device(place) = (Y * Y).sum(Eigen::array({1})).sqrt(); - Z.device(place) = XY / XNorm / YNorm; + auto xy = (x * y).sum(Eigen::array({1})); + x_norm.device(place) = x.square().sum(Eigen::array({1})).sqrt(); + y_norm.device(place) = y.square().sum(Eigen::array({1})).sqrt(); + z.device(place) = xy / x_norm / y_norm; } }; @@ -59,41 +59,44 @@ template class CosSimGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* z = context.Input("Out"); - auto* x_norm = context.Input("XNorm"); - auto* y_norm = context.Input("YNorm"); - auto* grad_x = context.Output(framework::GradVarName("X")); - auto* grad_y = context.Output(framework::GradVarName("Y")); - auto* grad_z = context.Input(framework::GradVarName("Out")); + auto* input_x = context.Input("X"); + auto* input_y = context.Input("Y"); + auto* input_z = context.Input("Out"); + auto* input_x_norm = context.Input("XNorm"); + auto* input_y_norm = context.Input("YNorm"); + auto* output_grad_x = context.Output(framework::GradVarName("X")); + auto* output_grad_y = context.Output(framework::GradVarName("Y")); + auto* input_grad_z = context.Input(framework::GradVarName("Out")); - grad_x->mutable_data(context.GetPlace()); - grad_y->mutable_data(context.GetPlace()); - - auto dims = x->dims(); + auto dims = input_x->dims(); int size = static_cast(framework::product(dims)); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); - auto X = EigenMatrix::From(*x, new_dims); - auto Y = EigenMatrix::From(*y, new_dims); - auto Z = EigenMatrix::From(*z); - auto X_norm = EigenMatrix::From(*x_norm); - auto Y_norm = EigenMatrix::From(*y_norm); - auto dX = EigenMatrix::From(*grad_x, new_dims); - auto dY = EigenMatrix::From(*grad_y, new_dims); - auto dZ = EigenMatrix::From(*grad_z); + auto x = EigenMatrix::From(*input_x, new_dims); + auto y = EigenMatrix::From(*input_y, new_dims); + auto z = EigenMatrix::From(*input_z); + auto x_norm = EigenMatrix::From(*input_x_norm); + auto y_norm = EigenMatrix::From(*input_y_norm); + auto dz = EigenMatrix::From(*input_grad_z); Eigen::DSizes bcast(1, new_dims[1]); - auto Z_bcast = Z.broadcast(bcast); - auto dZ_bcast = dZ.broadcast(bcast); + auto z_bcast = z.broadcast(bcast); + auto dz_bcast = dz.broadcast(bcast); auto place = context.GetEigenDevice(); - auto X_snorm_bcast = X_norm.square().eval().broadcast(bcast); - auto Y_snorm_bcast = Y_norm.square().eval().broadcast(bcast); - auto norm_prod_bcast = (X_norm * Y_norm).eval().broadcast(bcast); - dX.device(place) = - dZ_bcast * (Y / norm_prod_bcast - Z_bcast * X / X_snorm_bcast); - dY.device(place) = - dZ_bcast * (X / norm_prod_bcast - Z_bcast * Y / Y_snorm_bcast); + auto x_snorm_bcast = x_norm.square().eval().broadcast(bcast); + auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast); + auto norm_prod_bcast = (x_norm * y_norm).eval().broadcast(bcast); + if (output_grad_x) { + output_grad_x->mutable_data(context.GetPlace()); + auto dx = EigenMatrix::From(*output_grad_x, new_dims); + dx.device(place) = + dz_bcast * (y / norm_prod_bcast - z_bcast * x / x_snorm_bcast); + } + if (output_grad_y) { + output_grad_y->mutable_data(context.GetPlace()); + auto dy = EigenMatrix::From(*output_grad_y, new_dims); + dy.device(place) = + dz_bcast * (x / norm_prod_bcast - z_bcast * y / y_snorm_bcast); + } } }; diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py index a19be47f76..32013a7999 100644 --- a/python/paddle/v2/framework/tests/test_cos_sim_op.py +++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py @@ -24,26 +24,36 @@ class TestCosSimOp(unittest.TestCase): } -class CosSimGradOpTest(GradientChecker): - def test_cos_sim_2d(self): - op = create_op("cos_sim") - inputs = { +class TestCosSimGradOp(GradientChecker): + def setUp(self): + self.op = create_op("cos_sim") + self.inputs = { 'X': np.random.random((10, 5)).astype("float32"), 'Y': np.random.random((10, 5)).astype("float32") } - self.compare_grad(op, inputs) + + def test_cpu_gpu_compare(self): + self.compare_grad(self.op, self.inputs) + + def test_normal(self): self.check_grad( - op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.05) + self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.05) - def test_cos_sim_3d(self): - op = create_op("cos_sim") - inputs = { - 'X': np.random.random((10, 5, 2)).astype("float32"), - 'Y': np.random.random((10, 5, 2)).astype("float32") - } - self.compare_grad(op, inputs) + def test_ignore_x(self): + self.check_grad( + self.op, + self.inputs, ["Y"], + "Out", + max_relative_error=0.05, + no_grad_set={"X"}) + + def test_ignore_y(self): self.check_grad( - op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.05) + self.op, + self.inputs, ["X"], + "Out", + max_relative_error=0.05, + no_grad_set={"Y"}) if __name__ == '__main__': From 52f2bc1f9d84a05f1c73428124a1235fb141f193 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 6 Sep 2017 13:34:01 +0800 Subject: [PATCH 23/26] fix vector size error --- paddle/operators/gaussian_random_op.cc | 3 ++- paddle/operators/uniform_random_op.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 583c9e5774..8bb61275ba 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -46,7 +46,8 @@ class GaussianRandomOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext& context) const override { auto* tensor = context.Output("Out"); auto dims = GetAttr>("dims"); - std::vector temp(dims.size()); + std::vector temp; + temp.reserve(dims.size()); for (auto dim : dims) { temp.push_back(static_cast(dim)); } diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 3241d4289a..40cef8942a 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -52,7 +52,8 @@ class UniformRandomOp : public framework::OperatorWithKernel { "uniform_random's min must less then max"); auto* tensor = ctx.Output("Out"); auto dims = GetAttr>("dims"); - std::vector temp(dims.size()); + std::vector temp; + temp.reserve(dims.size()); for (auto dim : dims) { temp.push_back(static_cast(dim)); } From 1e419d9ed3610c95b518982a026666ea3f699f27 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 6 Sep 2017 17:24:47 +0800 Subject: [PATCH 24/26] fix clang build and run error --- paddle/operators/cos_sim_op.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 9e3ff26815..9e2bcebe3b 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -23,6 +23,9 @@ using Tensor = framework::Tensor; template using EigenMatrix = framework::EigenMatrix; +template +using EigenVector = framework::EigenVector; template class CosSimKernel : public framework::OpKernel { @@ -43,14 +46,14 @@ class CosSimKernel : public framework::OpKernel { auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto x = EigenMatrix::From(*input_x, new_dims); auto y = EigenMatrix::From(*input_y, new_dims); - auto z = EigenMatrix::From(*output_z); - auto x_norm = EigenMatrix::From(*output_x_norm); - auto y_norm = EigenMatrix::From(*output_y_norm); + auto z = EigenVector::Flatten(*output_z); + auto x_norm = EigenVector::Flatten(*output_x_norm); + auto y_norm = EigenVector::Flatten(*output_y_norm); auto place = context.GetEigenDevice(); - auto xy = (x * y).sum(Eigen::array({1})); - x_norm.device(place) = x.square().sum(Eigen::array({1})).sqrt(); - y_norm.device(place) = y.square().sum(Eigen::array({1})).sqrt(); + auto xy = (x * y).sum(Eigen::array({{1}})); + x_norm.device(place) = x.square().sum(Eigen::array({{1}})).sqrt(); + y_norm.device(place) = y.square().sum(Eigen::array({{1}})).sqrt(); z.device(place) = xy / x_norm / y_norm; } }; From 4373a4a74d0d30c2980752b10e7bce336d434f74 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 6 Sep 2017 18:24:08 +0800 Subject: [PATCH 25/26] fix redeclaration of 'paddle::StatPtr __stat' --- paddle/gserver/layers/Conv3DLayer.cpp | 6 ++---- paddle/gserver/layers/DeConv3DLayer.cpp | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index 3887aa58b2..9deda2de98 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -83,8 +83,8 @@ void Conv3DLayer::forward(PassType passType) { int outWidth = getSize(); resetOutput(batchSize, outWidth); + REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); const MatrixPtr &inMat = getInputValue(i); const MatrixPtr &outMat = getOutputValue(); int M = M_[i]; @@ -120,7 +120,6 @@ void Conv3DLayer::forward(PassType passType) { } } if (nullptr != this->biasParameter_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); this->addBias(); } forwardActivation(); @@ -134,15 +133,14 @@ void Conv3DLayer::backward(const UpdateCallback &callback) { biases_->getParameterPtr()->incUpdate(callback); } + REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); if (weights_[i]->getWGrad()) { bpropWeights(i); } if (getInputGrad(i)) { bpropData(i); } - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weights_[i]->getParameterPtr()->incUpdate(callback); } } diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 2838980a97..1b59ed60c5 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -84,8 +84,8 @@ void DeConv3DLayer::forward(PassType passType) { resetOutput(batchSize, outWidth); const MatrixPtr outMat = getOutputValue(); + REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); const MatrixPtr &inMat = getInputValue(i); int M = M_[i]; int N = N_[i]; @@ -120,7 +120,6 @@ void DeConv3DLayer::forward(PassType passType) { } } if (nullptr != this->biasParameter_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); this->addBias(); } forwardActivation(); @@ -133,12 +132,12 @@ void DeConv3DLayer::backward(const UpdateCallback &callback) { bpropBiases(); biases_->getParameterPtr()->incUpdate(callback); } + REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); for (size_t i = 0; i < inputLayers_.size(); ++i) { if (weights_[i]->getWGrad() || this->needGradient_) { int M = M_[i]; int N = N_[i]; int K = K_[i]; - REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); const MatrixPtr &inMat = getInputValue(i); for (int n = 0; n < batchSize; ++n) { @@ -182,7 +181,6 @@ void DeConv3DLayer::backward(const UpdateCallback &callback) { } } } - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weights_[i]->getParameterPtr()->incUpdate(callback); } } From db5e726d160789f05fb8323c6915991e5a276339 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 6 Sep 2017 18:14:27 +0800 Subject: [PATCH 26/26] update the how to write op doc. --- doc/howto/dev/new_op_cn.md | 183 +++++++++++++++++++++---------------- 1 file changed, 102 insertions(+), 81 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 0d29865447..58665e9f2b 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -45,7 +45,9 @@ Kernel实现 | CPU、GPU共享Kernel实现在`.h`文件中,否则,CPU ### 1. 定义ProtoMaker类 -矩阵乘的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: +矩阵乘法的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。 + +首先定义`ProtoMaker`来描述该Op的输入、输出,并添加注释: ```cpp class MulOpMaker : public framework::OpProtoAndCheckerMaker { @@ -63,17 +65,17 @@ The equation is: Out = X * Y }; ``` -[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个参数: +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数含有2个参数: - `framework::OpProto` : 前者存储Op的输入输出和参数属性,将用于Python API接口的生成。 - `framework::OpAttrChecker` :后者用于检查参数属性的合法性。 -构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加该Op的注释,这些函数会将对应内容添加到`OpProto`中。 +构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加Op的注释。这些函数会将对应内容添加到`OpProto`中。 -在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守命名规范。 +上面的代码在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守命名规范。 -再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子: +再以[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)为例: ```cpp template @@ -91,14 +93,16 @@ The equation is: Out = scale*X }; ``` - 这个例子有两处不同: +这个例子有两处不同: + +- `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中,如果Op的某个输入不参与反向梯度的计算,请显示地调用`.NotInGradient()`进行设置。 - - `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中。 - - `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 +- `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 ### 2. 定义Operator类 +下面的点实现了MulOp的定义: ```cpp class MulOp : public framework::OperatorWithKernel { @@ -143,14 +147,27 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法。 - 2). 设置输出Tensor的形状。 -通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和要讲到的注册函数一起放在`.cc`中 +通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和下面将要介绍的注册函数一起放在`.cc`中 ### 3. 定义OpKernel类 -```cpp -template -class MulKernel : public framework::OpKernel { - public: +`MulKernel`继承自`framework::OpKernel`,带有下面两个模板参数: + +- `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + +- `typename T` : 表示数据类型,如`float`, `double`等。 + +需要为`MulKernel`类重写`Compute`接口。 +- `Compute`接受一个输入参数:`const framework::ExecutionContext& context`。 +- 与`InferShapeContext`相比,`ExecutionContext`增加了设备类型,同样可获取到输入输出和属性参数。 +- `Compute`函数里实现`OpKernel`的具体计算逻辑。 + +下面是 `MulKernel` `Compute`的实现: + + ```cpp + template + class MulKernel : public framework::OpKernel { + public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); auto* Y = context.Input("Y"); @@ -160,50 +177,50 @@ class MulKernel : public framework::OpKernel { const_cast(context.device_context_); math::matmul(*X, false, *Y, false, 1, Z, 0, device_context); } -}; -``` + }; + ``` -`MulKernel`继承自`framework::OpKernel`,带有模板参数: +需要注意:**不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。** - - `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 +`MulOp`的CPU、GPU实现共享同一个`Kernel`。`OpKernel`不共享的例子可以参考:[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 - - `typename T` : 表示数据类型,如`float`, `double`等。 +为了使`OpKernel`的计算过程书写更加简单,并且CPU、GPU的代码可以复用,我们通常借助 Eigen unsupported Tensor模块来实现`Compute`接口。关于在PaddlePaddle中如何使用Eigen库,请参考[使用文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md)。 -`MulKernel`需要重写`Compute`接口,该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型,同样可获取到输入输出和属性参数,`Compute`函数里写具体实现时。 -注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 +到此,前向Op实现完成。接下来,需要在`.cc`文件中注册该op和kernel。 +反向Op类的定义,反向OpKernel的定义与前向Op类似,这里不再赘述。**但需注意反向Op没有`ProtoMaker`**。 -为了使得`OpKernel`的计算过程书写较为简单,CPU、GPU的代码可以复用,我们通常借助Eigen unsupported Tensor模块来实现。关于在paddle中如何使用Eigen库,请参考对应的使用[文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md) +### 4. 注册Operator -到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 +- 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 -### 4. 注册Operator + ```cpp + namespace ops = paddle::operators; + REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); + REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_CPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` -在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 + 在上面的代码中: -```cpp -namespace ops = paddle::operators; -REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); -REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); -REGISTER_OP_CPU_KERNEL(mul_grad, - ops::MulGradKernel); -``` + - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`。 + - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 + - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 - - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`, - - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 - - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 -在 `.cu`文件中注册GPU Kernel。请注意,如果GPU Kernel的实现是基于Eigen unsupported模块,那么在 `.cu`的最前面请加上宏定义 `#define EIGEN_USE_GPU` +- 在 `.cu`文件中注册GPU Kernel。 + - 请注意,如果GPU Kernel的实现基于Eigen unsupported模块,那么在 `.cu`的开始请加上宏定义 `#define EIGEN_USE_GPU`,代码示例如下: -```cpp -// if use Eigen unsupported module before include head files -#define EIGEN_USE_GPU + ```cpp + // if use Eigen unsupported module before include head files + #define EIGEN_USE_GPU -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); -REGISTER_OP_GPU_KERNEL(mul_grad, - ops::MulGradKernel); -``` + namespace ops = paddle::operators; + REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_GPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` ### 5. 编译 @@ -225,7 +242,7 @@ REGISTER_OP_GPU_KERNEL(mul_grad, - 绑定Python 在 [`paddle/pybind/pybind.cc -`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类: +`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc) 使用`USE_OP`告知编译器需要链接的Op,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 ``` USE_OP(mul); @@ -242,50 +259,54 @@ REGISTER_OP_GPU_KERNEL(mul_grad, USE_NO_KENREL_OP(recurrent); ``` - 使用`USE_OP`告知编译器需要链接该Op的目标文件,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 - - 生成库 - 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会自动被添加链接到生成的lib库中。 + 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动添加链接到生成的lib库中。 ## 实现单元测试 -单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 +单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单元测试](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 ### 前向Operator单元测试 -前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 +前向Op单元测试继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`。各项更加具体的单元测试在`OpTestMeta`里完成。测试前向Operator,需要: -```python -import unittest -import numpy as np -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +1. 在`setUp`函数定义输入、输出,以及相关的属性参数。 +2. 生成随机的输入数据。 +3. 在Python脚本中实现与前向operator相同的计算逻辑,得到输出值,与operator前向计算的输出进行对比。 -class TestMulOp(unittest.TestCase): - __metaclass__ = OpTestMeta - def setUp(self): - self.type = "mul" - self.inputs = { - 'X': np.random.random((32, 84)).astype("float32"), - 'Y': np.random.random((84, 100)).astype("float32") - } - self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} -``` - 首先需要`import`必要的包,下面详细解释其他值: + ```python + import unittest + import numpy as np + from gradient_checker import GradientChecker, create_op + from op_test_util import OpTestMeta + + class TestMulOp(unittest.TestCase): + __metaclass__ = OpTestMeta - - `self.type = "mul" ` : 定义类型,和注册的类型一致。 - - `self.inputs` : 定义输入,类型为Numpy.array,并初始化。 - - `self.outputs` : 定义输出,并得到Python结算结果。 + def setUp(self): + self.type = "mul" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} + ``` + +上面的代码首先导入依赖的包,下面是对`setUp`函数中操作的重要变量的详细解释: + +- `self.type = "mul" ` : 定义类型,与operator注册时注册的类型一致。 +- `self.inputs` : 定义输入,类型为`numpy.array`,并初始化。 +- `self.outputs` : 定义输出,并在Python脚本中完成与operator同样的计算逻辑,返回Python端的计算结果。 ### 反向Operator单元测试 -反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 +反向Op单元测试继承自`GradientChecker`,而`GradientChecker`继承自`unittest.TestCase`,因此,**反向单元测试函数需要以`test_`开头**。 -```cpp +```python class TestMulGradOp(GradientChecker): def setUp(self): self.op = create_op("mul") @@ -319,27 +340,27 @@ class TestMulGradOp(GradientChecker): no_grad_set={"Y"}) ``` -下面解释一些关键的地方: +下面解释代码中一些关键的地方: - - 调用`create_op("mul")`创建反向Op对应的前向Op。 - - 调用`compare_grad`函数对比CPU、GPU计算结果。 - - `test_normal`中调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。 - - 第一个参数`self.op` : 前向Op。 - - 第二个参数`self.inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。 - - 第三个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。 - - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` - - `test_ignore_x`和`test_ignore_y`分支测试只需要计算一个输入梯度的情况。 +- 调用`create_op("mul")`创建反向Op对应的前向Op。 +- 调用`compare_grad`函数对比CPU、GPU计算结果。 +- `test_normal`中调用`check_grad`使用数值法检测梯度正确性和稳定性。 + - 第一个参数`self.op` : 前向Op。 + - 第二个参数`self.inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。 + - 第三个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。 + - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` +- `test_ignore_x`和`test_ignore_y`分支用来测试只需要计算一个输入梯度的情况。 ### 编译和执行单元测试 -单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加以下内容将单测加入工程中: +单元测试编写完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)中添加以下内容,将单元测试加入工程: ``` py_test(test_mul_op SRCS test_mul_op.py) ``` -请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单测: +请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单元测试: ```bash make test ARGS="-R test_mul_op -V"