From 3e13b9122fef2ac8dc53bad340ad41902f2a4ccd Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 5 Sep 2017 12:18:51 +0800 Subject: [PATCH 01/51] add softmax_with_cost_op. --- paddle/operators/softmax_with_cost_op.cc | 82 +++++++++++++++++++ paddle/operators/softmax_with_cost_op.h | 40 +++++++++ paddle/pybind/pybind.cc | 1 + .../tests/test_softmax_with_cost_op.py | 22 +++++ 4 files changed, 145 insertions(+) create mode 100644 paddle/operators/softmax_with_cost_op.cc create mode 100644 paddle/operators/softmax_with_cost_op.h create mode 100644 python/paddle/v2/framework/tests/test_softmax_with_cost_op.py diff --git a/paddle/operators/softmax_with_cost_op.cc b/paddle/operators/softmax_with_cost_op.cc new file mode 100644 index 0000000000..a4537691a0 --- /dev/null +++ b/paddle/operators/softmax_with_cost_op.cc @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +:A +limitations under the License. */ + +#include "paddle/operators/softmax_op.h" + +namespace paddle { +namespace operators { + +class SoftmaxWithLossOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto logits = ctx.Input("logits"); + PADDLE_ENFORCE(logits->dims().size() == 2UL, + "The input of softmax_with_loss_op should be a 2-d tensor."); + PADDLE_ENFORCE(ctx.Input("lables")->dims().size() == 1UL, + "The label should be a 1-d tensor."); + ctx.Output("loss")->Resize({logits->dims()[0]}); + } +}; + +class SoftmaxWithLossOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SoftmaxWithLossOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("logits", + "The unscaled log probabilities which is a 2-D tensor with" + "shape [N x K]. N is the batch_size, and K is the class number."); + AddInput("label", "The ground truth. A 1-D tensor with shape N."); + AddOutput("loss", "A 1-D tensor with shape N."); + AddComment(R"DOC( +Cross entropy loss with softmax are used as the output layer extensively. This +operator computes the softmax normalized values for each row of the input +tensor, after which cross-entropy loss is then computed. This provides a more +numerically stable gradient. + +Because this operators performs a softmax on logits internally, it expects +unscaled logits. Please do not call this op with the output of softmax operator, +which will produce incorrect results. + +This operators expects mutually exclusive hard labels, each sample in a batch +is in exactly one class with probabilities 1. Each sample in the batch with one +and only one label. +)DOC"); + } +}; + +class SoftmaxWithLossOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override {} +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(softmax, ops::SoftmaxWithLossOp, ops::SoftmaxWithLossOpMaker, + softmax_grad, ops::SoftmaxWithLossOpGrad); +REGISTER_OP_CPU_KERNEL( + softmax, ops::SoftmaxWithLossKernel); +REGISTER_OP_CPU_KERNEL( + softmax_grad, + ops::SoftmaxWithLossGradKernel); diff --git a/paddle/operators/softmax_with_cost_op.h b/paddle/operators/softmax_with_cost_op.h new file mode 100644 index 0000000000..fb544842b7 --- /dev/null +++ b/paddle/operators/softmax_with_cost_op.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + +template +class SoftmaxWithLossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override {} +}; + +template +class SoftmaxWithLossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override {} +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 6896422617..e86f4dfe26 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -37,6 +37,7 @@ USE_OP(mul); USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); +USE_OP(softmax_with_loss); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_NO_KERNEL_OP(recurrent); diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cost_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cost_op.py new file mode 100644 index 0000000000..f7b9f54a91 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_softmax_with_cost_op.py @@ -0,0 +1,22 @@ +import unittest + +import numpy as np + +from gradient_checker import GradientChecker, create_op +from op_test_util import OpTestMeta + + +class TestSoftmaxWithLossOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + pass + + +class SoftmaxWithLossGradOpTest(GradientChecker): + def test_softmax(self): + pass + + +if __name__ == '__main__': + unittest.main() From 513bc99702e8d8fd36c34de3aa813c0229442d6b Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 8 Sep 2017 18:49:40 +0800 Subject: [PATCH 02/51] softmax with cross entropy as a cost operator. --- paddle/operators/softmax_with_cost_op.cc | 82 -------------- .../softmax_with_cross_entropy_op.cc | 102 ++++++++++++++++++ .../softmax_with_cross_entropy_op.cu | 25 +++++ ...t_op.h => softmax_with_cross_entropy_op.h} | 22 ++-- paddle/pybind/pybind.cc | 2 +- 5 files changed, 139 insertions(+), 94 deletions(-) delete mode 100644 paddle/operators/softmax_with_cost_op.cc create mode 100644 paddle/operators/softmax_with_cross_entropy_op.cc create mode 100644 paddle/operators/softmax_with_cross_entropy_op.cu rename paddle/operators/{softmax_with_cost_op.h => softmax_with_cross_entropy_op.h} (51%) diff --git a/paddle/operators/softmax_with_cost_op.cc b/paddle/operators/softmax_with_cost_op.cc deleted file mode 100644 index a4537691a0..0000000000 --- a/paddle/operators/softmax_with_cost_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -:A -limitations under the License. */ - -#include "paddle/operators/softmax_op.h" - -namespace paddle { -namespace operators { - -class SoftmaxWithLossOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - auto logits = ctx.Input("logits"); - PADDLE_ENFORCE(logits->dims().size() == 2UL, - "The input of softmax_with_loss_op should be a 2-d tensor."); - PADDLE_ENFORCE(ctx.Input("lables")->dims().size() == 1UL, - "The label should be a 1-d tensor."); - ctx.Output("loss")->Resize({logits->dims()[0]}); - } -}; - -class SoftmaxWithLossOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SoftmaxWithLossOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("logits", - "The unscaled log probabilities which is a 2-D tensor with" - "shape [N x K]. N is the batch_size, and K is the class number."); - AddInput("label", "The ground truth. A 1-D tensor with shape N."); - AddOutput("loss", "A 1-D tensor with shape N."); - AddComment(R"DOC( -Cross entropy loss with softmax are used as the output layer extensively. This -operator computes the softmax normalized values for each row of the input -tensor, after which cross-entropy loss is then computed. This provides a more -numerically stable gradient. - -Because this operators performs a softmax on logits internally, it expects -unscaled logits. Please do not call this op with the output of softmax operator, -which will produce incorrect results. - -This operators expects mutually exclusive hard labels, each sample in a batch -is in exactly one class with probabilities 1. Each sample in the batch with one -and only one label. -)DOC"); - } -}; - -class SoftmaxWithLossOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override {} -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP(softmax, ops::SoftmaxWithLossOp, ops::SoftmaxWithLossOpMaker, - softmax_grad, ops::SoftmaxWithLossOpGrad); -REGISTER_OP_CPU_KERNEL( - softmax, ops::SoftmaxWithLossKernel); -REGISTER_OP_CPU_KERNEL( - softmax_grad, - ops::SoftmaxWithLossGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc new file mode 100644 index 0000000000..2edf00766e --- /dev/null +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/softmax_with_cross_entropy_op.h" + +namespace paddle { +namespace operators { + +class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto logits = ctx.Input("logits"); + PADDLE_ENFORCE( + logits->dims().size() == 2UL, + "The input of softmax_with_cross_entropy should be a 2-d tensor."); + PADDLE_ENFORCE(ctx.Input("lables")->dims().size() == 1UL, + "The label should be a 1-d tensor."); + ctx.Output("Y")->Resize({logits->dims()[0]}); + } +}; + +class SoftmaxWithCrossEntropyOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + SoftmaxWithCrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("logits", + "The unscaled log probabilities which is a 2-D tensor with" + "shape [N x K]. N is the batch_size, and K is the class number."); + AddInput("label", "The ground truth. A 1-D tensor with shape N."); + AddOutput("Y", "A 1-D tensor with shape N."); + AddComment(R"DOC( +Cross entropy loss with softmax are used as the output layer extensively. This +operator computes the softmax normalized values for each row of the input +tensor, after which cross-entropy loss is then computed. This provides a more +numerically stable gradient. + +Because this operators performs a softmax on logits internally, it expects +unscaled logits. Please do not call this op with the output of softmax operator, +which will produce incorrect results. + +This operators expects mutually exclusive hard labels, each sample in a batch +is in exactly one class with probabilities 1. Each sample in the batch with one +and only one label. +)DOC"); + } +}; + +class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), + "Input(Y@GRAD) should be not null."); + PADDLE_ENFORCE_EQ(ctx.Input("Y")->dims(), + ctx.Input(framework::GradVarName("Y"))->dims(), + "Input(Y) and its gradients should have a same shape."); + + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("labels"), + "Input(lables) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("logits")), + "Input(logits@GRAD) should be not null."); + PADDLE_ENFORCE_EQ( + ctx.Input("logits")->dims(), + ctx.Input(framework::GradVarName("logits"))->dims(), + "Input(logits) and its gradients should have a same shape."); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, + ops::SoftmaxWithCrossEntropyOpMaker, + softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyOpGrad); +REGISTER_OP_CPU_KERNEL( + softmax_with_cross_entropy, + ops::SoftmaxWithCrossEntropyKernel); +REGISTER_OP_CPU_KERNEL( + softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu new file mode 100644 index 0000000000..c9d47cc4aa --- /dev/null +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "softmax_with_cross_entropy_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL( + softmax_with_cross_entropy, + ops::SoftmaxWithCrossEntropyKernel); +REGISTER_OP_GPU_KERNEL( + softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cost_op.h b/paddle/operators/softmax_with_cross_entropy_op.h similarity index 51% rename from paddle/operators/softmax_with_cost_op.h rename to paddle/operators/softmax_with_cross_entropy_op.h index fb544842b7..418fb540b8 100644 --- a/paddle/operators/softmax_with_cost_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" @@ -25,13 +25,13 @@ template ; template -class SoftmaxWithLossKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override {} }; template -class SoftmaxWithLossGradKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override {} }; diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 7d363b3108..a059cd0b81 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -37,7 +37,7 @@ USE_OP(mul); USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); -USE_OP(softmax_with_loss); +USE_OP(softmax_with_cross_entropy); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_NO_KERNEL_OP(recurrent); From c0cef849b6971657b3d3396578e18824ec926e15 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 12 Sep 2017 00:17:11 +0800 Subject: [PATCH 03/51] softmax as function. --- paddle/operators/CMakeLists.txt | 4 +- paddle/operators/math/CMakeLists.txt | 8 +-- paddle/operators/math/softmax_function.cc | 63 +++++++++++++++++++++++ paddle/operators/math/softmax_function.h | 29 +++++++++++ paddle/operators/softmax_op.h | 32 ++---------- 5 files changed, 103 insertions(+), 33 deletions(-) create mode 100644 paddle/operators/math/softmax_function.cc create mode 100644 paddle/operators/math/softmax_function.h diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f9ea25ab04..94e00ac382 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -55,10 +55,12 @@ set(DEPS_OPS minus_op mul_op recurrent_op - scale_op) + scale_op + softmax_op) op_library(identity_op DEPS scale_op) op_library(minus_op DEPS scale_op) op_library(mul_op DEPS math_function) +op_library(softmax_op DEPS math_function) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) op_library(scale_op DEPS net_op) diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index f8333f34f7..8ce39db621 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,9 +1,9 @@ - if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc - im2col.cu DEPS cblas device_context) + nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc + im2col.cu softmax_function.cc DEPS cblas device_context operator) else() - cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context) + cc_library(math_function SRCS math_function.cc im2col.cc + softmax_function.cc DEPS cblas device_context operator) endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/softmax_function.cc b/paddle/operators/math/softmax_function.cc new file mode 100644 index 0000000000..7edb632d31 --- /dev/null +++ b/paddle/operators/math/softmax_function.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_ONLY_CPU +#define EIGEN_USE_GPU +#endif + +#include "paddle/operators/math/softmax_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template +using EigenMatrix = framework::EigenMatrix; + +template +void softmax(const framework::Tensor* X, framework::Tensor* Y, + const framework::ExecutionContext& context) { + auto logits = EigenMatrix::From(*X); + auto softmax = EigenMatrix::From(*Y); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + + auto shifted_logits = (logits - + logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); + + softmax.device(context.GetEigenDevice()) = shifted_logits.exp(); + softmax.device(context.GetEigenDevice()) = + (softmax * + softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/softmax_function.h b/paddle/operators/math/softmax_function.h new file mode 100644 index 0000000000..2e1b2a7ad0 --- /dev/null +++ b/paddle/operators/math/softmax_function.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace operators { +namespace math { + +template +void softmax(const framework::Tensor* X, framework::Tensor* Y, + const framework::ExecutionContext& context); +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 8a3a5ab927..ff054a59ae 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/softmax_function.h" namespace paddle { namespace operators { @@ -30,36 +31,11 @@ class SoftmaxKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto X = context.Input("X"); auto Y = context.Output("Y"); - Y->mutable_data(context.GetPlace()); - - auto logits = EigenMatrix::From(*X); - auto softmax = EigenMatrix::From(*Y); - - const int kBatchDim = 0; - const int kClassDim = 1; - const int batch_size = logits.dimension(kBatchDim); - const int num_classes = logits.dimension(kClassDim); + // allocate memory on device. + Y->mutable_data(context.GetPlace()); - Eigen::DSizes along_class(kClassDim); - Eigen::DSizes batch_by_one(batch_size, 1); - Eigen::DSizes one_by_class(1, num_classes); - - auto shifted_logits = (logits - - logits.maximum(along_class) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); - - softmax.device(context.GetEigenDevice()) = shifted_logits.exp(); - - softmax.device(context.GetEigenDevice()) = - (softmax * - softmax.sum(along_class) - .inverse() - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); + math::softmax(X, Y, context); } }; From c6366c819e668c21a822122086ad72008357dd66 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 12 Sep 2017 16:38:11 +0800 Subject: [PATCH 04/51] softmax as functor. --- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/cross_entropy_op.h | 28 ++------- paddle/operators/math/CMakeLists.txt | 7 ++- paddle/operators/math/softmax_function.cc | 58 ++++--------------- paddle/operators/math/softmax_function.cu | 27 +++++++++ paddle/operators/math/softmax_function.h | 57 ++++++++++++++---- paddle/operators/softmax_op.h | 2 +- .../softmax_with_cross_entropy_op.cc | 44 +++++++------- .../operators/softmax_with_cross_entropy_op.h | 27 ++++++++- .../framework/tests/test_cross_entropy_op.py | 13 +++-- .../tests/test_softmax_with_cost_op.py | 22 ------- .../test_softmax_with_cross_entropy_op.py | 39 +++++++++++++ 12 files changed, 192 insertions(+), 134 deletions(-) create mode 100644 paddle/operators/math/softmax_function.cu delete mode 100644 python/paddle/v2/framework/tests/test_softmax_with_cost_op.py create mode 100644 python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 94e00ac382..8863ffe8e3 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -60,7 +60,7 @@ set(DEPS_OPS op_library(identity_op DEPS scale_op) op_library(minus_op DEPS scale_op) op_library(mul_op DEPS math_function) -op_library(softmax_op DEPS math_function) +op_library(softmax_op DEPS softmax_function) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) op_library(scale_op DEPS net_op) diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index eb4d1348de..6de23bbe00 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -14,31 +14,13 @@ limitations under the License. */ #pragma once #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/utils.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; -template -inline T tolerable_value(const T x) { - static_assert(std::is_floating_point::value, - "tolerable_value works only on float, " - "double and double double."); - - const T kApproInf = 1e20; - - if (x == INFINITY) { - return kApproInf; - } - - if (x == -INFINITY) { - return -kApproInf; - } - - return x; -} - template class OnehotCrossEntropyOpKernel : public framework::OpKernel { public: @@ -55,12 +37,12 @@ class OnehotCrossEntropyOpKernel : public framework::OpKernel { T* Ydata = Y->data(); - int batch_size = X->dims()[0]; - int class_num = X->dims()[1]; + const int batch_size = X->dims()[0]; + const int class_num = X->dims()[1]; for (int i = 0; i < batch_size; ++i) { int index = i * class_num + label_data[i]; - Ydata[i] = -tolerable_value(std::log(Xdata[index])); + Ydata[i] = -math::tolerable_value(std::log(Xdata[index])); } } }; @@ -89,7 +71,7 @@ class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel { memset(dXdata, 0, sizeof(T) * batch_size * class_num); for (int i = 0; i < batch_size; ++i) { int index = i * class_num + label_data[i]; - dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]); + dXdata[index] = -math::tolerable_value(dYdata[i] / Xdata[index]); } } }; diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 8ce39db621..832a954e3a 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,9 +1,12 @@ if(WITH_GPU) nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc - im2col.cu softmax_function.cc DEPS cblas device_context operator) + im2col.cu DEPS cblas device_context operator) + nv_library(softmax_function SRCS softmax_function.cc softmax_function.cu + DEPS operator) else() cc_library(math_function SRCS math_function.cc im2col.cc - softmax_function.cc DEPS cblas device_context operator) + DEPS cblas device_context operator) + cc_library(softmax_function SRCS softmax_function.cc DEPS operator) endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/softmax_function.cc b/paddle/operators/math/softmax_function.cc index 7edb632d31..cd46ed96ca 100644 --- a/paddle/operators/math/softmax_function.cc +++ b/paddle/operators/math/softmax_function.cc @@ -1,20 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef PADDLE_ONLY_CPU -#define EIGEN_USE_GPU -#endif + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #include "paddle/operators/math/softmax_function.h" @@ -22,41 +18,7 @@ namespace paddle { namespace operators { namespace math { -template -using EigenMatrix = framework::EigenMatrix; - -template -void softmax(const framework::Tensor* X, framework::Tensor* Y, - const framework::ExecutionContext& context) { - auto logits = EigenMatrix::From(*X); - auto softmax = EigenMatrix::From(*Y); - - const int kBatchDim = 0; - const int kClassDim = 1; - - const int batch_size = logits.dimension(kBatchDim); - const int num_classes = logits.dimension(kClassDim); - - Eigen::DSizes along_class(kClassDim); - Eigen::DSizes batch_by_one(batch_size, 1); - Eigen::DSizes one_by_class(1, num_classes); - - auto shifted_logits = (logits - - logits.maximum(along_class) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); - - softmax.device(context.GetEigenDevice()) = shifted_logits.exp(); - softmax.device(context.GetEigenDevice()) = - (softmax * - softmax.sum(along_class) - .inverse() - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); -} +template class SoftmaxFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/softmax_function.cu b/paddle/operators/math/softmax_function.cu new file mode 100644 index 0000000000..486697a161 --- /dev/null +++ b/paddle/operators/math/softmax_function.cu @@ -0,0 +1,27 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/operators/math/softmax_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template class SoftmaxFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/softmax_function.h b/paddle/operators/math/softmax_function.h index 2e1b2a7ad0..ce29a69bce 100644 --- a/paddle/operators/math/softmax_function.h +++ b/paddle/operators/math/softmax_function.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" @@ -21,9 +21,44 @@ namespace paddle { namespace operators { namespace math { +template +using EigenMatrix = framework::EigenMatrix; + template -void softmax(const framework::Tensor* X, framework::Tensor* Y, - const framework::ExecutionContext& context); +class SoftmaxFunctor { + public: + void operator()(const framework::Tensor* X, framework::Tensor* Y, + const framework::ExecutionContext& context) { + auto logits = EigenMatrix::From(*X); + auto softmax = EigenMatrix::From(*Y); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + + auto shifted_logits = (logits - + logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); + + softmax.device(context.GetEigenDevice()) = shifted_logits.exp(); + softmax.device(context.GetEigenDevice()) = + (softmax * + softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); + } +}; } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index ff054a59ae..6d14542a72 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -35,7 +35,7 @@ class SoftmaxKernel : public framework::OpKernel { // allocate memory on device. Y->mutable_data(context.GetPlace()); - math::softmax(X, Y, context); + math::SoftmaxFunctor()(X, Y, context); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index 2edf00766e..b4aa9aab4b 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -23,13 +23,13 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto logits = ctx.Input("logits"); + auto logits = ctx.Input("Logits"); PADDLE_ENFORCE( logits->dims().size() == 2UL, "The input of softmax_with_cross_entropy should be a 2-d tensor."); - PADDLE_ENFORCE(ctx.Input("lables")->dims().size() == 1UL, + PADDLE_ENFORCE(ctx.Input("Label")->dims().size() == 1UL, "The label should be a 1-d tensor."); - ctx.Output("Y")->Resize({logits->dims()[0]}); + ctx.Output("Label")->Resize({logits->dims()[0]}); } }; @@ -39,11 +39,15 @@ class SoftmaxWithCrossEntropyOpMaker SoftmaxWithCrossEntropyOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("logits", + AddInput("Logits", "The unscaled log probabilities which is a 2-D tensor with" "shape [N x K]. N is the batch_size, and K is the class number."); - AddInput("label", "The ground truth. A 1-D tensor with shape N."); - AddOutput("Y", "A 1-D tensor with shape N."); + AddInput("Label", "The ground truth. A 1-D tensor with shape N."); + AddOutput("Softmax", + "Store the outputs of softmax function, " + "which will be used in backward calculation.") + .AsIntermediate(); + AddOutput("Loss", "A 1-D tensor with shape N."); AddComment(R"DOC( Cross entropy loss with softmax are used as the output layer extensively. This operator computes the softmax normalized values for each row of the input @@ -67,21 +71,21 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should be not null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), - "Input(Y@GRAD) should be not null."); - PADDLE_ENFORCE_EQ(ctx.Input("Y")->dims(), - ctx.Input(framework::GradVarName("Y"))->dims(), - "Input(Y) and its gradients should have a same shape."); - - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("labels"), - "Input(lables) should be not null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("logits")), - "Input(logits@GRAD) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Loss"), + "Input(Loss) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Loss")), + "Input(Loss@GRAD) should be not null."); PADDLE_ENFORCE_EQ( - ctx.Input("logits")->dims(), - ctx.Input(framework::GradVarName("logits"))->dims(), - "Input(logits) and its gradients should have a same shape."); + ctx.Input("Logits")->dims(), + ctx.Input(framework::GradVarName("Logits"))->dims(), + "Input(Logits) and its gradients should have a same shape."); + PADDLE_ENFORCE_EQ( + ctx.Input("Logits")->dims(), + ctx.Input(framework::GradVarName("Logits"))->dims(), + "Input(Logits) and its gradients should have a same shape."); + + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), + "Input(Lable) should be not null."); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index 418fb540b8..4c019a7599 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -15,6 +15,8 @@ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/softmax_function.h" +#include "paddle/operators/math/utils.h" namespace paddle { namespace operators { @@ -27,7 +29,30 @@ using EigenMatrix = framework::EigenMatrix; template class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override {} + void Compute(const framework::ExecutionContext& context) const override { + // Calculate ths softmax outputs. + const Tensor* logits = context.Input("Logits"); + Tensor* softmax = context.Output("Softmax"); + // allocate memory on device. + softmax->mutable_data(context.GetPlace()); + math::SoftmaxFunctor()(logits, softmax, context); + + // Calculate the cross entropy loss based on hard labels. + T* softmax_out = softmax->data(); + const int* label_data = context.Input("label")->data(); + + Tensor* loss = context.Output("Loss"); + loss->mutable_data(context.GetPlace()); + T* loss_data = loss->data(); + + const int batch_size = logits->dims()[0]; + const int class_num = logits->dims()[1]; + + for (int i = 0; i < batch_size; ++i) { + int index = i * class_num + label_data[i]; + loss_data[i] = -math::tolerable_value(std::log(softmax_out[index])); + } + } }; template diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index c2fc102a8b..6c1dc4044f 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,6 +1,7 @@ import unittest import numpy from op_test import OpTest +import pdb class TestCrossEntropy(OpTest): @@ -10,18 +11,20 @@ class TestCrossEntropy(OpTest): class_num = 10 X = numpy.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") - label = (class_num / 2) * numpy.ones(batch_size).astype("int32") - self.inputs = {'X': X, 'label': label} + + labels = numpy.random.randint(0, class_num, batch_size, dtype="int32") + + self.inputs = {"X": X, "label": labels} Y = [] for i in range(0, batch_size): - Y.append(-numpy.log(X[i][label[i]])) - self.outputs = {'Y': numpy.array(Y).astype("float32")} + Y.append(-numpy.log(X[i][labels[i]])) + self.outputs = {"Y": numpy.array(Y).astype("float32")} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(["X"], "Y") if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cost_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cost_op.py deleted file mode 100644 index f7b9f54a91..0000000000 --- a/python/paddle/v2/framework/tests/test_softmax_with_cost_op.py +++ /dev/null @@ -1,22 +0,0 @@ -import unittest - -import numpy as np - -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta - - -class TestSoftmaxWithLossOp(unittest.TestCase): - __metaclass__ = OpTestMeta - - def setUp(self): - pass - - -class SoftmaxWithLossGradOpTest(GradientChecker): - def test_softmax(self): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py new file mode 100644 index 0000000000..6116110569 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py @@ -0,0 +1,39 @@ +import unittest +import numpy as np +import pdb + +from op_test import OpTest +from test_softmax_op import stable_softmax + + +class TestSoftmaxWithCrossEntropyOp(OpTest): + def setUp(self): + self.op_type = "softmax_with_cross_entropy" + + MAX_BATCH_SIZE = 23 + MAX_CLASS_NUM = 255 + + batch_size = np.random.randint(1, MAX_BATCH_SIZE, 1)[0] + class_num = np.random.randint(2, MAX_CLASS_NUM, 1)[0] + + logits = np.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + softmax = np.apply_along_axis(stable_softmax, 1, logits) + labels = np.random.randint(0, class_num, batch_size, dtype="int32") + + cross_entropy = [ + -np.log(softmax[i][labels[i]]) for i in range(softmax.shape[0]) + ] + + self.inputs = {"Logits": logits, "Label": labels} + self.outputs = {"Loss": cross_entropy} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + pass + + +if __name__ == "__main__": + unittest.main() From efa4526c52fbd80f0f0c9f135f1aabf438cbcf69 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 13 Sep 2017 18:01:12 +0800 Subject: [PATCH 05/51] finish implementation and fix unittest. --- paddle/operators/softmax_op.h | 2 - .../softmax_with_cross_entropy_op.cc | 71 +++++++++---------- .../softmax_with_cross_entropy_op.cu | 7 +- .../operators/softmax_with_cross_entropy_op.h | 30 ++++++-- paddle/pybind/pybind.cc | 2 +- python/paddle/v2/framework/tests/op_test.py | 22 +++--- .../test_softmax_with_cross_entropy_op.py | 12 ++-- 7 files changed, 77 insertions(+), 69 deletions(-) diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 6d14542a72..68d05fc215 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -43,8 +43,6 @@ template class SoftmaxGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - std::shared_ptr scale_ = std::make_shared(); - auto Y = context.Input("Y"); auto dY = context.Input(framework::GradVarName("Y")); auto dX = context.Output(framework::GradVarName("X")); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index b4aa9aab4b..fd75494ff8 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -17,31 +17,16 @@ namespace paddle { namespace operators { -class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - auto logits = ctx.Input("Logits"); - PADDLE_ENFORCE( - logits->dims().size() == 2UL, - "The input of softmax_with_cross_entropy should be a 2-d tensor."); - PADDLE_ENFORCE(ctx.Input("Label")->dims().size() == 1UL, - "The label should be a 1-d tensor."); - ctx.Output("Label")->Resize({logits->dims()[0]}); - } -}; - class SoftmaxWithCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftmaxWithCrossEntropyOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) + SoftmaxWithCrossEntropyOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("Logits", "The unscaled log probabilities which is a 2-D tensor with" - "shape [N x K]. N is the batch_size, and K is the class number."); + "shape [N x K]. N is the batch_size, and K is the class number.") + .NotInGradient(); AddInput("Label", "The ground truth. A 1-D tensor with shape N."); AddOutput("Softmax", "Store the outputs of softmax function, " @@ -70,22 +55,34 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Loss"), - "Input(Loss) should be not null."); + void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Loss")), - "Input(Loss@GRAD) should be not null."); - PADDLE_ENFORCE_EQ( - ctx.Input("Logits")->dims(), - ctx.Input(framework::GradVarName("Logits"))->dims(), - "Input(Logits) and its gradients should have a same shape."); - PADDLE_ENFORCE_EQ( - ctx.Input("Logits")->dims(), - ctx.Input(framework::GradVarName("Logits"))->dims(), - "Input(Logits) and its gradients should have a same shape."); - + "Input(Loss@Grad) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"), + "Input(Softmax) should be not null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), "Input(Lable) should be not null."); + + ctx.Output(framework::GradVarName("Logits")) + ->Resize(ctx.Input("Softmax")->dims()); + } +}; + +class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + const Tensor* logits = ctx.Input("Logits"); + PADDLE_ENFORCE( + logits->dims().size() == 2UL, + "The input of softmax_with_cross_entropy should be a 2-d tensor."); + PADDLE_ENFORCE(ctx.Input("Label")->dims().size() == 1UL, + "The label should be a 1-d tensor."); + + ctx.Output("Softmax")->Resize(logits->dims()); + ctx.Output("Loss")->Resize({logits->dims()[0], 1}); } }; @@ -98,9 +95,7 @@ REGISTER_OP(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, ops::SoftmaxWithCrossEntropyOpMaker, softmax_with_cross_entropy_grad, ops::SoftmaxWithCrossEntropyOpGrad); -REGISTER_OP_CPU_KERNEL( - softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyKernel); -REGISTER_OP_CPU_KERNEL( - softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradKernel); +REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy, + ops::SoftmaxWithCrossEntropyKernel); +REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index c9d47cc4aa..922bb19d4d 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -17,9 +17,4 @@ namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL( - softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyKernel); -REGISTER_OP_GPU_KERNEL( - softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradKernel); +// TODO(caoying) add GPU kernel diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index 4c019a7599..e147cdb815 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -26,20 +26,24 @@ template using EigenMatrix = framework::EigenMatrix; -template +template class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { + auto place = context.GetPlace(); + PADDLE_ENFORCE(platform::is_cpu_place(place), + "This kernel only runs on CPU."); + // Calculate ths softmax outputs. const Tensor* logits = context.Input("Logits"); Tensor* softmax = context.Output("Softmax"); - // allocate memory on device. softmax->mutable_data(context.GetPlace()); - math::SoftmaxFunctor()(logits, softmax, context); + + math::SoftmaxFunctor()(logits, softmax, context); // Calculate the cross entropy loss based on hard labels. T* softmax_out = softmax->data(); - const int* label_data = context.Input("label")->data(); + const int* label_data = context.Input("Label")->data(); Tensor* loss = context.Output("Loss"); loss->mutable_data(context.GetPlace()); @@ -55,10 +59,24 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { } }; -template +template class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override {} + void Compute(const framework::ExecutionContext& context) const override { + Tensor* logit_grad = + context.Output(framework::GradVarName("Logits")); + logit_grad->ShareDataWith(*context.Input("Softmax")); + T* logit_grad_data = logit_grad->data(); + + const int batch_size = logit_grad->dims()[0]; + const int class_num = logit_grad->dims()[1]; + + const int* label_data = context.Input("Label")->data(); + for (int i = 0; i < batch_size; ++i) { + int index = i * class_num + label_data[i]; + logit_grad_data[index] -= .1; + } + } }; } // namespace operators diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 93792c568e..cb361596ae 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -39,7 +39,6 @@ USE_OP(elementwise_mul); USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); -USE_OP(softmax_with_cross_entropy); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_NO_KERNEL_OP(recurrent); @@ -53,6 +52,7 @@ USE_OP(cos_sim); USE_CPU_ONLY_OP(gather); USE_CPU_ONLY_OP(scatter); USE_CPU_ONLY_OP(concat); +USE_CPU_ONLY_OP(softmax_with_cross_entropy); USE_OP(top_k); USE_OP(squared_l2_distance); USE_OP(sum); diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 4fec4c9109..f5f11aa93d 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -166,7 +166,7 @@ def get_gradient(scope, op, inputs, outputs, grad_name, place, class OpTest(unittest.TestCase): - def check_output_with_place(self, place): + def check_output_with_place(self, place, atol): self.scope = core.Scope() op_inputs = self.inputs if hasattr(self, "inputs") else dict() op_attrs = self.attrs if hasattr(self, "attrs") else dict() @@ -188,22 +188,23 @@ class OpTest(unittest.TestCase): expect = sub_out[sub_out_name] self.assertTrue( np.allclose( - actual, expect, atol=1e-05), - "output name: " + out_name + "has diff") + actual, expect, atol=atol), + "output name: " + out_name + " has diff.") else: actual = np.array(self.scope.find_var(out_name).get_tensor()) expect = self.outputs[out_name] + self.assertTrue( np.allclose( - actual, expect, atol=1e-05), - "output name: " + out_name + "has diff") + actual, expect, atol=atol), + "output name: " + out_name + " has diff.") - def check_output(self): + def check_output(self, atol=1e-5): places = [core.CPUPlace()] if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: - self.check_output_with_place(place) + self.check_output_with_place(place, atol) def __assert_is_close(self, numeric_grads, analytic_grads, names, max_relative_error, msg_prefix): @@ -217,9 +218,10 @@ class OpTest(unittest.TestCase): def err_msg(): offset = np.argmax(diff_mat > max_relative_error) - return "%s Variable %s max gradient diff %f over limit %f, the first " \ - "error element is %d" % ( - msg_prefix, name, max_diff, max_relative_error, offset) + return ("%s Variable %s max gradient diff %f over limit %f, " + "the first error element is %d") % ( + msg_prefix, name, max_diff, max_relative_error, + offset) self.assertLessEqual(max_diff, max_relative_error, err_msg()) diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py index 6116110569..4e35c063b9 100644 --- a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py @@ -11,7 +11,7 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): self.op_type = "softmax_with_cross_entropy" MAX_BATCH_SIZE = 23 - MAX_CLASS_NUM = 255 + MAX_CLASS_NUM = 10 batch_size = np.random.randint(1, MAX_BATCH_SIZE, 1)[0] class_num = np.random.randint(2, MAX_CLASS_NUM, 1)[0] @@ -21,18 +21,18 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): softmax = np.apply_along_axis(stable_softmax, 1, logits) labels = np.random.randint(0, class_num, batch_size, dtype="int32") - cross_entropy = [ - -np.log(softmax[i][labels[i]]) for i in range(softmax.shape[0]) - ] + cross_entropy = np.asmatrix( + [[-np.log(softmax[i][labels[i]])] for i in range(softmax.shape[0])], + dtype="float32") self.inputs = {"Logits": logits, "Label": labels} - self.outputs = {"Loss": cross_entropy} + self.outputs = {"Softmax": softmax, "Loss": cross_entropy} def test_check_output(self): self.check_output() def test_check_grad(self): - pass + self.check_grad(["Logits"], "Loss") if __name__ == "__main__": From 8f8ea005fecd911e913ff728ed37ecb990dfbbca Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 15 Sep 2017 14:51:04 +0800 Subject: [PATCH 06/51] fix implementations. --- paddle/operators/math/utils.h | 42 ++++++++ paddle/operators/onehot_cross_entropy_op.cu | 20 +--- .../softmax_with_cross_entropy_op.cc | 12 +-- .../softmax_with_cross_entropy_op.cu | 97 ++++++++++++++++++- .../operators/softmax_with_cross_entropy_op.h | 7 +- .../framework/tests/test_cross_entropy_op.py | 1 - .../test_softmax_with_cross_entropy_op.py | 7 +- 7 files changed, 151 insertions(+), 35 deletions(-) create mode 100644 paddle/operators/math/utils.h diff --git a/paddle/operators/math/utils.h b/paddle/operators/math/utils.h new file mode 100644 index 0000000000..1e72c8e0ca --- /dev/null +++ b/paddle/operators/math/utils.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/platform/assert.h" +#include "paddle/platform/hostdevice.h" + +namespace paddle { +namespace operators { +namespace math { + +template +T HOSTDEVICE tolerable_value(const T x) { + PADDLE_ASSERT(std::is_floating_point::value); + + const T kApproInf = 1e20; + + if (x == INFINITY) { + return kApproInf; + } + + if (x == -INFINITY) { + return -kApproInf; + } + + return x; +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/onehot_cross_entropy_op.cu b/paddle/operators/onehot_cross_entropy_op.cu index d999bfce58..f8ed9680e7 100644 --- a/paddle/operators/onehot_cross_entropy_op.cu +++ b/paddle/operators/onehot_cross_entropy_op.cu @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/utils.h" #include "paddle/platform/assert.h" namespace paddle { @@ -20,20 +21,6 @@ namespace operators { using Tensor = framework::Tensor; -template -__host__ __device__ T clipping_log(const T x) { - PADDLE_ASSERT(std::is_floating_point::value); - const T kApproInf = 1e20; - T v = log(x); - if (v == INFINITY) { - return kApproInf; - } - if (v == -INFINITY) { - return -kApproInf; - } - return v; -} - template __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, const int N, const int D) { @@ -42,7 +29,7 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { PADDLE_ASSERT(label[i] >= 0 && label[i] < D); - Y[i] = -clipping_log(X[i * D + label[i]]); + Y[i] = -math::tolerable_value(log(X[i * D + label[i]])); } } @@ -73,7 +60,7 @@ class OnehotCrossEntropyOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "This kernel only runs on GPU device."); auto X = ctx.Input("X"); const T* Xdata = X->data(); @@ -86,6 +73,7 @@ class OnehotCrossEntropyOpCUDAKernel : public framework::OpKernel { int D = X->dims()[1]; int block = 512; int grid = (N + block - 1) / block; + // TODO(qingqing) launch kernel on specified stream // base on ExecutionContext. CrossEntropyKernel<<>>(Ydata, Xdata, label_data, N, D); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index fd75494ff8..a0941bb624 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -32,7 +32,7 @@ class SoftmaxWithCrossEntropyOpMaker "Store the outputs of softmax function, " "which will be used in backward calculation.") .AsIntermediate(); - AddOutput("Loss", "A 1-D tensor with shape N."); + AddOutput("Out", "A 1-D tensor with shape N."); AddComment(R"DOC( Cross entropy loss with softmax are used as the output layer extensively. This operator computes the softmax normalized values for each row of the input @@ -56,14 +56,14 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Loss")), - "Input(Loss@Grad) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Input(Out@Grad) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"), "Input(Softmax) should be not null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), "Input(Lable) should be not null."); - ctx.Output(framework::GradVarName("Logits")) + ctx.Output(framework::GradVarName("Logits")) ->Resize(ctx.Input("Softmax")->dims()); } }; @@ -81,8 +81,8 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx.Input("Label")->dims().size() == 1UL, "The label should be a 1-d tensor."); - ctx.Output("Softmax")->Resize(logits->dims()); - ctx.Output("Loss")->Resize({logits->dims()[0], 1}); + ctx.Output("Softmax")->Resize(logits->dims()); + ctx.Output("Out")->Resize({logits->dims()[0], 1}); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index 922bb19d4d..5af6a521a8 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,8 +13,97 @@ limitations under the License. */ #define EIGEN_USE_GPU -#include "softmax_with_cross_entropy_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/softmax_function.h" +#include "paddle/operators/math/utils.h" -namespace ops = paddle::operators; +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +__global__ void CrossEntropyKernel(T* out, const T* softmax_out, + const int* label, const int batch_size, + const int class_num) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= batch_size) return; + PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num); + out[i] = -math::tolerable_value(log(softmax_out[i * class_num + label[i]])); +} + +template +__global__ void CrossEntropyWithSoftmaxGradKernel(T* softmax_out, + const int* label, + const int batch_size, + const int class_num) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= batch_size) return; + + PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num); + softmax_out[i * class_num + label[i]] -= 1.; +} + +template +class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), + "This kernel only runs on GPU device."); + + // Calculate ths softmax outputs. + const Tensor* logits = context.Input("Logits"); + Tensor* softmax = context.Output("Softmax"); + softmax->mutable_data(context.GetPlace()); + math::SoftmaxFunctor()(logits, softmax, context); + T* softmax_out = softmax->data(); + + // Calculate the cross entropy loss based on hard labels. + const int* label_data = context.Input("Label")->data(); + Tensor* loss = context.Output("Out"); + loss->mutable_data(context.GetPlace()); + T* loss_data = loss->data(); + + const int batch_size = logits->dims()[0]; + const int class_num = logits->dims()[1]; + int block = 512; + int grid = (batch_size + block - 1) / block; -// TODO(caoying) add GPU kernel + CrossEntropyKernel<<>>(loss_data, softmax_out, label_data, + batch_size, class_num); + } +}; + +template +class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), + "This kernel only runs on GPU device."); + + Tensor* logit_grad = + context.Output(framework::GradVarName("Logits")); + logit_grad->ShareDataWith(*context.Input("Softmax")); + T* logit_grad_data = logit_grad->data(); + + const int batch_size = logit_grad->dims()[0]; + const int class_num = logit_grad->dims()[1]; + + const int* label_data = context.Input("Label")->data(); + + const int block = 512; + const int grid = (batch_size + block - 1) / block; + + CrossEntropyWithSoftmaxGradKernel<<>>( + logit_grad_data, label_data, batch_size, class_num); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy, + ops::SoftmaxWithCrossEntropyCUDAKernel); +REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyGradCUDAKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index e147cdb815..38b92a0bcd 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -30,8 +30,7 @@ template class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto place = context.GetPlace(); - PADDLE_ENFORCE(platform::is_cpu_place(place), + PADDLE_ENFORCE(platform::is_cpu_place(context.GetPlace()), "This kernel only runs on CPU."); // Calculate ths softmax outputs. @@ -45,7 +44,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { T* softmax_out = softmax->data(); const int* label_data = context.Input("Label")->data(); - Tensor* loss = context.Output("Loss"); + Tensor* loss = context.Output("Out"); loss->mutable_data(context.GetPlace()); T* loss_data = loss->data(); @@ -74,7 +73,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { const int* label_data = context.Input("Label")->data(); for (int i = 0; i < batch_size; ++i) { int index = i * class_num + label_data[i]; - logit_grad_data[index] -= .1; + logit_grad_data[index] -= 1.; } } }; diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index 5e06525d61..253e7b8a24 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,7 +1,6 @@ import unittest import numpy from op_test import OpTest -import pdb class TestCrossEntropy(OpTest): diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py index 4e35c063b9..e965dd0482 100644 --- a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py @@ -1,6 +1,5 @@ import unittest import numpy as np -import pdb from op_test import OpTest from test_softmax_op import stable_softmax @@ -11,7 +10,7 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): self.op_type = "softmax_with_cross_entropy" MAX_BATCH_SIZE = 23 - MAX_CLASS_NUM = 10 + MAX_CLASS_NUM = 17 batch_size = np.random.randint(1, MAX_BATCH_SIZE, 1)[0] class_num = np.random.randint(2, MAX_CLASS_NUM, 1)[0] @@ -26,13 +25,13 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): dtype="float32") self.inputs = {"Logits": logits, "Label": labels} - self.outputs = {"Softmax": softmax, "Loss": cross_entropy} + self.outputs = {"Softmax": softmax, "Out": cross_entropy} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss") + self.check_grad(["Logits"], "Out", max_relative_error=0.05) if __name__ == "__main__": From f1d5fb3b9a6201f3eaf92b12d84b3e3727a3a575 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 21 Sep 2017 17:47:52 +0800 Subject: [PATCH 07/51] support soft labels. --- paddle/operators/math/CMakeLists.txt | 4 +- .../math/{softmax_function.cc => softmax.cc} | 2 +- .../math/{softmax_function.cu => softmax.cu} | 2 +- .../math/{softmax_function.h => softmax.h} | 0 paddle/operators/math/utils.h | 42 ----------- paddle/operators/softmax_op.h | 2 +- .../softmax_with_cross_entropy_op.cc | 75 +++++++++++++------ .../softmax_with_cross_entropy_op.cu | 22 +++--- .../operators/softmax_with_cross_entropy_op.h | 8 +- .../test_softmax_with_cross_entropy_op.py | 4 +- 10 files changed, 74 insertions(+), 87 deletions(-) rename paddle/operators/math/{softmax_function.cc => softmax.cc} (93%) rename paddle/operators/math/{softmax_function.cu => softmax.cu} (94%) rename paddle/operators/math/{softmax_function.h => softmax.h} (100%) delete mode 100644 paddle/operators/math/utils.h diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 832a954e3a..074ca47d7f 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,12 +1,12 @@ if(WITH_GPU) nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context operator) - nv_library(softmax_function SRCS softmax_function.cc softmax_function.cu + nv_library(softmax_function SRCS softmax.cc softmax.cu DEPS operator) else() cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) - cc_library(softmax_function SRCS softmax_function.cc DEPS operator) + cc_library(softmax_function SRCS softmax.cc DEPS operator) endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/softmax_function.cc b/paddle/operators/math/softmax.cc similarity index 93% rename from paddle/operators/math/softmax_function.cc rename to paddle/operators/math/softmax.cc index cd46ed96ca..ac9f3c4bf6 100644 --- a/paddle/operators/math/softmax_function.cc +++ b/paddle/operators/math/softmax.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/math/softmax_function.h" +#include "paddle/operators/math/softmax.h" namespace paddle { namespace operators { diff --git a/paddle/operators/math/softmax_function.cu b/paddle/operators/math/softmax.cu similarity index 94% rename from paddle/operators/math/softmax_function.cu rename to paddle/operators/math/softmax.cu index 486697a161..4c3df0550e 100644 --- a/paddle/operators/math/softmax_function.cu +++ b/paddle/operators/math/softmax.cu @@ -14,7 +14,7 @@ #define EIGEN_USE_GPU -#include "paddle/operators/math/softmax_function.h" +#include "paddle/operators/math/softmax.h" namespace paddle { namespace operators { diff --git a/paddle/operators/math/softmax_function.h b/paddle/operators/math/softmax.h similarity index 100% rename from paddle/operators/math/softmax_function.h rename to paddle/operators/math/softmax.h diff --git a/paddle/operators/math/utils.h b/paddle/operators/math/utils.h deleted file mode 100644 index 1e72c8e0ca..0000000000 --- a/paddle/operators/math/utils.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once -#include "paddle/platform/assert.h" -#include "paddle/platform/hostdevice.h" - -namespace paddle { -namespace operators { -namespace math { - -template -T HOSTDEVICE tolerable_value(const T x) { - PADDLE_ASSERT(std::is_floating_point::value); - - const T kApproInf = 1e20; - - if (x == INFINITY) { - return kApproInf; - } - - if (x == -INFINITY) { - return -kApproInf; - } - - return x; -} - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 68d05fc215..18494e470a 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/math/softmax_function.h" +#include "paddle/operators/math/softmax.h" namespace paddle { namespace operators { diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index a0941bb624..3dd21279ad 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -23,16 +23,32 @@ class SoftmaxWithCrossEntropyOpMaker SoftmaxWithCrossEntropyOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { + //(TODO caoying) replace int with boolean + AddAttr("soft_label", + "(int, default 0), A flag to indicate whether to interpretate " + "the given labels as soft labels.") + .SetDefault(0); AddInput("Logits", - "The unscaled log probabilities which is a 2-D tensor with" - "shape [N x K]. N is the batch_size, and K is the class number.") + "(Tensor, default Tensor), The unscaled log probabilities " + "which is a 2-D tensor with shape [N x K]. N is the batch_size, " + "and K is the class number.") .NotInGradient(); - AddInput("Label", "The ground truth. A 1-D tensor with shape N."); - AddOutput("Softmax", - "Store the outputs of softmax function, " - "which will be used in backward calculation.") + AddInput( + "Label", + "(Tensor, default Tensor), The ground truth which is " + "a 1-D or 2-D tensor. " + "If soft_label is set to 0, Label is a Tensor with shape [N x 1]. " + "If soft_label is set to 1, Label is a Tensor " + "with shape [N x K]."); + AddOutput( + "Softmax", + "(Tensor, default Tensor), A 2-D tensor with shape [N x K]. " + "The outputs value of softmax activation by given the input batch, " + "which will be used in backward calculation.") .AsIntermediate(); - AddOutput("Out", "A 1-D tensor with shape N."); + AddOutput("Loss", + "(Tensor, default Tensor), A 1-D tensor. The cross " + "entropy loss with shape [N x 1]."); AddComment(R"DOC( Cross entropy loss with softmax are used as the output layer extensively. This operator computes the softmax normalized values for each row of the input @@ -46,25 +62,18 @@ which will produce incorrect results. This operators expects mutually exclusive hard labels, each sample in a batch is in exactly one class with probabilities 1. Each sample in the batch with one and only one label. -)DOC"); - } -}; -class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; +Equation: - protected: - void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), - "Input(Out@Grad) should not be null"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"), - "Input(Softmax) should be not null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), - "Input(Lable) should be not null."); +1) hard label (one-hot label) - ctx.Output(framework::GradVarName("Logits")) - ->Resize(ctx.Input("Softmax")->dims()); +Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), j = 1, ..., K + +2) soft label (a distribution over all classes) + +Loss_j = -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i-\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), j = 1,...,K + +)DOC"); } }; @@ -82,7 +91,25 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { "The label should be a 1-d tensor."); ctx.Output("Softmax")->Resize(logits->dims()); - ctx.Output("Out")->Resize({logits->dims()[0], 1}); + ctx.Output("Loss")->Resize({logits->dims()[0], 1}); + } +}; + +class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Loss")), + "Input(Loss@Grad) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"), + "Input(Softmax) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), + "Input(Lable) should be not null."); + + ctx.Output(framework::GradVarName("Logits")) + ->Resize(ctx.Input("Softmax")->dims()); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index 5af6a521a8..68bb85fa8a 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -13,9 +13,10 @@ limitations under the License. */ #define EIGEN_USE_GPU + #include "paddle/framework/op_registry.h" -#include "paddle/operators/math/softmax_function.h" -#include "paddle/operators/math/utils.h" +#include "paddle/operators/cross_entropy_op.h" +#include "paddle/operators/math/softmax.h" namespace paddle { namespace operators { @@ -27,9 +28,10 @@ __global__ void CrossEntropyKernel(T* out, const T* softmax_out, const int* label, const int batch_size, const int class_num) { int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= batch_size) return; - PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num); - out[i] = -math::tolerable_value(log(softmax_out[i * class_num + label[i]])); + if (i < batch_size) { + PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num); + out[i] = -tolerable_value(std::log(softmax_out[i * class_num + label[i]])); + } } template @@ -38,10 +40,10 @@ __global__ void CrossEntropyWithSoftmaxGradKernel(T* softmax_out, const int batch_size, const int class_num) { int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= batch_size) return; - - PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num); - softmax_out[i * class_num + label[i]] -= 1.; + if (i < batch_size) { + PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num); + softmax_out[i * class_num + label[i]] -= 1.; + } } template @@ -60,7 +62,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { // Calculate the cross entropy loss based on hard labels. const int* label_data = context.Input("Label")->data(); - Tensor* loss = context.Output("Out"); + Tensor* loss = context.Output("Loss"); loss->mutable_data(context.GetPlace()); T* loss_data = loss->data(); diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index 38b92a0bcd..0ad48dae2c 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -15,8 +15,8 @@ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/math/softmax_function.h" -#include "paddle/operators/math/utils.h" +#include "paddle/operators/cross_entropy_op.h" +#include "paddle/operators/math/softmax.h" namespace paddle { namespace operators { @@ -44,7 +44,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { T* softmax_out = softmax->data(); const int* label_data = context.Input("Label")->data(); - Tensor* loss = context.Output("Out"); + Tensor* loss = context.Output("Loss"); loss->mutable_data(context.GetPlace()); T* loss_data = loss->data(); @@ -53,7 +53,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { for (int i = 0; i < batch_size; ++i) { int index = i * class_num + label_data[i]; - loss_data[i] = -math::tolerable_value(std::log(softmax_out[index])); + loss_data[i] = -tolerable_value(std::log(softmax_out[index])); } } }; diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py index e965dd0482..9c9ee77b73 100644 --- a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py @@ -25,13 +25,13 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): dtype="float32") self.inputs = {"Logits": logits, "Label": labels} - self.outputs = {"Softmax": softmax, "Out": cross_entropy} + self.outputs = {"Softmax": softmax, "Loss": cross_entropy} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Out", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss", max_relative_error=0.05) if __name__ == "__main__": From 859dba591baaac50656fe3e25ab5a50a17445fc5 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 21 Sep 2017 20:13:07 -0700 Subject: [PATCH 08/51] Init commit --- paddle/pybind/pybind.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index c7009a604f..90b995decb 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -315,6 +315,23 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compile_gpu", IsCompileGPU); + py::class_(m, "ProgramDesc", "") + .def_static("instance", [] { return &GetProgramDesc(); }) + .def("append_block", [](ProgramDesc &self) { + auto desc = self.mutable_blocks()->Add(); + desc->set_idx(self.mutable_blocks()->size() - 1); + return desc; + }); + py::class_(m, "BlockDesc", "") + .def("idx", [](BlockDesc &self) { return self.idx(); }) + .def("set_parent", + [](BlockDesc &self, int32_t idx) { self.set_parent_idx(idx); }) + .def("parent", [](BlockDesc &self) { return self.parent_idx(); }); + + py::class_(m, "VarDesc", ""); + + py::class_(m, "OpDesc", ""); + return m.ptr(); } } // namespace framework From 7788b4605500d22ac31359115b3d341faa610080 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 21 Sep 2017 21:13:14 -0700 Subject: [PATCH 09/51] Expose VarDesc interface --- paddle/pybind/pybind.cc | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 90b995decb..e1f7bc8672 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -328,7 +328,31 @@ All parameter, weight, gradient are variables in Paddle. [](BlockDesc &self, int32_t idx) { self.set_parent_idx(idx); }) .def("parent", [](BlockDesc &self) { return self.parent_idx(); }); - py::class_(m, "VarDesc", ""); + py::class_(m, "VarDesc", "") + .def(py::init<>()) + .def("set_name", + [](VarDesc &self, const std::string &name) { self.set_name(name); }) + .def("set_shape", + [](VarDesc &self, const std::vector &dims) { + LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); + for (const int64_t &i : dims) { + lod_tensor_desc->add_dims(i); + } + }) + .def("set_data_type", + [](VarDesc &self, int type_id) { + LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); + lod_tensor_desc->set_data_type(static_cast(type_id)); + }) + .def("shape", [](VarDesc &self) { + const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); + int rank = lod_tensor_desc.dims_size(); + std::vector res(rank); + for (int i = 0; i < rank; ++i) { + res[i] = lod_tensor_desc.dims(i); + } + return res; + }); py::class_(m, "OpDesc", ""); From 70f398e2074b84701dc9b1b16f518ed9b9b16b62 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 21 Sep 2017 21:17:47 -0700 Subject: [PATCH 10/51] Update --- paddle/framework/attribute.cc | 3 + paddle/pybind/pybind.cc | 74 ++++++++++++++++--- .../v2/framework/tests/test_protobuf_descs.py | 16 ++++ 3 files changed, 83 insertions(+), 10 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_protobuf_descs.py diff --git a/paddle/framework/attribute.cc b/paddle/framework/attribute.cc index 159ed03b92..0a305e8a8c 100644 --- a/paddle/framework/attribute.cc +++ b/paddle/framework/attribute.cc @@ -24,6 +24,9 @@ static ProgramDesc* g_program_desc = nullptr; ProgramDesc& GetProgramDesc() { if (g_program_desc == nullptr) { g_program_desc = new ProgramDesc(); + auto root_block = g_program_desc->mutable_blocks()->Add(); + root_block->set_idx(0); + root_block->set_parent_idx(-1); } return *g_program_desc; } diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 90b995decb..835ea85aa1 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -316,21 +316,75 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compile_gpu", IsCompileGPU); py::class_(m, "ProgramDesc", "") - .def_static("instance", [] { return &GetProgramDesc(); }) - .def("append_block", [](ProgramDesc &self) { - auto desc = self.mutable_blocks()->Add(); - desc->set_idx(self.mutable_blocks()->size() - 1); - return desc; - }); + .def_static("instance", + [] { return &GetProgramDesc(); }, + py::return_value_policy::reference) + .def("append_block", + [](ProgramDesc &self, BlockDesc &parent) { + auto desc = self.mutable_blocks()->Add(); + desc->set_idx(self.mutable_blocks()->size() - 1); + desc->set_parent_idx(parent.idx()); + return desc; + }) + .def("root_block", + [](ProgramDesc &self) { return self.mutable_blocks()[0]; }); py::class_(m, "BlockDesc", "") .def("idx", [](BlockDesc &self) { return self.idx(); }) - .def("set_parent", - [](BlockDesc &self, int32_t idx) { self.set_parent_idx(idx); }) - .def("parent", [](BlockDesc &self) { return self.parent_idx(); }); + .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) + .def("append_op", + [](BlockDesc &self) { return self.mutable_ops()->Add(); }); py::class_(m, "VarDesc", ""); - py::class_(m, "OpDesc", ""); + auto op_desc_set_var = [](OpDesc::Var *var, + const std::string ¶meter, + const std::vector &arguments) { + var->set_parameter(parameter); + auto args = var->mutable_arguments(); + args->Reserve(static_cast(arguments.size())); + for (auto &arg : arguments) { + *args->Add() = arg; + } + }; + + auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { + auto attr = desc.mutable_attrs()->Add(); + attr->set_name(name); + return attr; + }; + + py::class_(m, "OpDesc", "") + .def("type", [](OpDesc &op) { return op.type(); }) + .def("set_input", + [op_desc_set_var](OpDesc &self, + const std::string ¶meter, + const std::vector &arguments) { + auto ipt = self.mutable_inputs()->Add(); + op_desc_set_var(ipt, parameter, arguments); + }) + .def("input_names", + [](OpDesc &self) { + std::vector ret_val; + ret_val.reserve(static_cast(self.inputs().size())); + std::transform( + self.inputs().begin(), + self.inputs().end(), + std::back_inserter(ret_val), + [](const OpDesc::Var &var) { return var.parameter(); }); + return ret_val; + }) + .def("__str__", [](OpDesc &self) { return self.DebugString(); }) + .def("set_output", + [op_desc_set_var](OpDesc &self, + const std::string ¶meter, + const std::vector &arguments) { + auto opt = self.mutable_outputs()->Add(); + op_desc_set_var(opt, parameter, arguments); + }) + .def("set_attr", + [op_desc_set_attr](OpDesc &self, const std::string &name, int i) { + op_desc_set_attr(self, name)->set_i(i); + }); return m.ptr(); } diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py new file mode 100644 index 0000000000..945610ff45 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -0,0 +1,16 @@ +import unittest +import paddle.v2.framework.core as core + + +class TestProgramDesc(unittest.TestCase): + def test_instance(self): + program_desc = core.ProgramDesc.instance() + self.assertIsNotNone(program_desc) + del program_desc + program_desc = core.ProgramDesc.instance() + self.assertIsNotNone(program_desc) + del program_desc + + +if __name__ == '__main__': + unittest.main() From 332369ca5c72fbe88f2504f71285b25247cc966e Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 21 Sep 2017 21:32:42 -0700 Subject: [PATCH 11/51] Add `new_var` for BlockDesc --- paddle/pybind/pybind.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index cbb7b1cbff..fa10c8e472 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -321,18 +321,19 @@ All parameter, weight, gradient are variables in Paddle. py::return_value_policy::reference) .def("append_block", [](ProgramDesc &self, BlockDesc &parent) { - auto desc = self.mutable_blocks()->Add(); + auto desc = self.add_blocks(); desc->set_idx(self.mutable_blocks()->size() - 1); desc->set_parent_idx(parent.idx()); return desc; }) .def("root_block", [](ProgramDesc &self) { return self.mutable_blocks()[0]; }); + py::class_(m, "BlockDesc", "") .def("idx", [](BlockDesc &self) { return self.idx(); }) .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) - .def("append_op", - [](BlockDesc &self) { return self.mutable_ops()->Add(); }); + .def("append_op", [](BlockDesc &self) { return self.add_ops(); }) + .def("new_var", [](BlockDesc &self) { return self.add_vars(); }); py::class_(m, "VarDesc", "") .def(py::init<>()) @@ -372,7 +373,7 @@ All parameter, weight, gradient are variables in Paddle. }; auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { - auto attr = desc.mutable_attrs()->Add(); + auto attr = desc.add_attrs(); attr->set_name(name); return attr; }; @@ -383,7 +384,7 @@ All parameter, weight, gradient are variables in Paddle. [op_desc_set_var](OpDesc &self, const std::string ¶meter, const std::vector &arguments) { - auto ipt = self.mutable_inputs()->Add(); + auto ipt = self.add_inputs(); op_desc_set_var(ipt, parameter, arguments); }) .def("input_names", @@ -402,7 +403,7 @@ All parameter, weight, gradient are variables in Paddle. [op_desc_set_var](OpDesc &self, const std::string ¶meter, const std::vector &arguments) { - auto opt = self.mutable_outputs()->Add(); + auto opt = self.add_outputs(); op_desc_set_var(opt, parameter, arguments); }) .def("set_attr", From 618884dd69af0f2e7ea7c0527ec2ba8131ec5a07 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 21 Sep 2017 21:39:42 -0700 Subject: [PATCH 12/51] Complete unittest for ProgramDesc --- paddle/pybind/pybind.cc | 21 +++++++++++++++---- .../v2/framework/tests/test_protobuf_descs.py | 9 ++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index cbb7b1cbff..cae3671350 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -319,17 +319,30 @@ All parameter, weight, gradient are variables in Paddle. .def_static("instance", [] { return &GetProgramDesc(); }, py::return_value_policy::reference) + .def_static("__create_program_desc__", + [] { + // Only used for unit-test + auto *prog_desc = new ProgramDesc; + auto *block = prog_desc->mutable_blocks()->Add(); + block->set_idx(0); + block->set_parent_idx(-1); + return prog_desc; + }) .def("append_block", [](ProgramDesc &self, BlockDesc &parent) { - auto desc = self.mutable_blocks()->Add(); + auto desc = self.add_blocks(); desc->set_idx(self.mutable_blocks()->size() - 1); desc->set_parent_idx(parent.idx()); return desc; - }) + }, + py::return_value_policy::reference) .def("root_block", - [](ProgramDesc &self) { return self.mutable_blocks()[0]; }); + [](ProgramDesc &self) { return self.mutable_blocks()->Mutable(0); }, + py::return_value_policy::reference) + .def("__str__", [](ProgramDesc &self) { return self.DebugString(); }); + py::class_(m, "BlockDesc", "") - .def("idx", [](BlockDesc &self) { return self.idx(); }) + .def("id", [](BlockDesc &self) { return self.idx(); }) .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) .def("append_op", [](BlockDesc &self) { return self.mutable_ops()->Add(); }); diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index 945610ff45..8e94843662 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -9,8 +9,17 @@ class TestProgramDesc(unittest.TestCase): del program_desc program_desc = core.ProgramDesc.instance() self.assertIsNotNone(program_desc) + self.assertIsNotNone(program_desc.root_block()) del program_desc + def test_append_block(self): + prog_desc = core.ProgramDesc.__create_program_desc__() + self.assertIsNotNone(prog_desc) + block1 = prog_desc.append_block(prog_desc.root_block()) + block2 = prog_desc.append_block(block1) + self.assertEqual(block1.id(), block2.parent()) + self.assertEqual(prog_desc.root_block().id(), block1.parent()) + if __name__ == '__main__': unittest.main() From b154c0e51ea1853ab935bf6c86966b0092babe6a Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 10:07:17 -0700 Subject: [PATCH 13/51] Update --- paddle/pybind/pybind.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index a6b583821f..543dbb739e 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -344,8 +344,12 @@ All parameter, weight, gradient are variables in Paddle. py::class_(m, "BlockDesc", "") .def("id", [](BlockDesc &self) { return self.idx(); }) .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) - .def("append_op", [](BlockDesc &self) { return self.add_ops(); }) - .def("new_var", [](BlockDesc &self) { return self.add_vars(); }); + .def("append_op", + [](BlockDesc &self) { return self.add_ops(); }, + py::return_value_policy::reference) + .def("new_var", + [](BlockDesc &self) { return self.add_vars(); }, + py::return_value_policy::reference); py::class_(m, "VarDesc", "") .def(py::init<>()) From ee547f6ac984b8880394acceb6fbec856f6a2dde Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 10:37:34 -0700 Subject: [PATCH 14/51] Add unittests --- .../v2/framework/tests/test_protobuf_descs.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index 8e94843662..71bdca8765 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -15,10 +15,25 @@ class TestProgramDesc(unittest.TestCase): def test_append_block(self): prog_desc = core.ProgramDesc.__create_program_desc__() self.assertIsNotNone(prog_desc) - block1 = prog_desc.append_block(prog_desc.root_block()) + block_root = prog_desc.root_block() + self.assertEqual(block_root.id(), 0) + block1 = prog_desc.append_block(block_root) block2 = prog_desc.append_block(block1) self.assertEqual(block1.id(), block2.parent()) - self.assertEqual(prog_desc.root_block().id(), block1.parent()) + self.assertEqual(block_root.id(), block1.parent()) + block3 = prog_desc.append_block(block_root) + self.assertEqual(block3.parent(), block_root.id()) + + +class TestVarDesc(unittest.TestCase): + def test_shape(self): + program_desc = core.ProgramDesc.instance() + block = program_desc.root_block() + var = block.new_var() + src_shape = [3, 2, 10, 8] + var.set_shape(src_shape) + res_shape = var.shape() + self.assertEqual(src_shape, res_shape) if __name__ == '__main__': From 17d93f4a04fa03517d84d20fd31829a7e02847b4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 10:38:04 -0700 Subject: [PATCH 15/51] Add Helper for Vector2Repeated --- paddle/pybind/pybind.cc | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index a6b583821f..74f5904034 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -53,6 +53,25 @@ bool IsCompileGPU() { #endif } +template +inline std::vector RepeatedToVector( + const google::protobuf::RepeatedField &repeated_field) { + std::vector ret; + ret.reserve(repeated_field.size()); + std::copy( + repeated_field.begin(), repeated_field.end(), std::back_inserter(ret)); + return ret; +} + +template +inline void VectorToRepeated(const std::vector &vec, + RepeatedField *repeated_field) { + repeated_field->Reserve(vec.size()); + for (auto &elem : vec) { + *repeated_field->Add() = elem; + } +} + PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of PaddlePaddle"); @@ -377,11 +396,7 @@ All parameter, weight, gradient are variables in Paddle. const std::string ¶meter, const std::vector &arguments) { var->set_parameter(parameter); - auto args = var->mutable_arguments(); - args->Reserve(static_cast(arguments.size())); - for (auto &arg : arguments) { - *args->Add() = arg; - } + VectorToRepeated(arguments, var->mutable_arguments()); }; auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { From e29003669ff30272070bb8513fb95c2042c305b9 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 11:23:47 -0700 Subject: [PATCH 16/51] Moving protobuf binding code to protobuf module --- paddle/pybind/CMakeLists.txt | 3 +- paddle/pybind/protobuf.cc | 136 ++++++++++++++++++++++++++++++++++ paddle/pybind/protobuf.h | 54 ++++++++++++++ paddle/pybind/pybind.cc | 140 ++--------------------------------- 4 files changed, 197 insertions(+), 136 deletions(-) create mode 100644 paddle/pybind/protobuf.cc create mode 100644 paddle/pybind/protobuf.h diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 4f05406c7f..a1d7483973 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,6 +1,7 @@ if(WITH_PYTHON) + cc_library(proto_bind SRCS protobuf.cc) cc_library(paddle_pybind SHARED SRCS pybind.cc - DEPS pybind python backward + DEPS proto_bind pybind python backward ${GLOB_OP_LIB}) endif(WITH_PYTHON) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc new file mode 100644 index 0000000000..91f4c7d7c8 --- /dev/null +++ b/paddle/pybind/protobuf.cc @@ -0,0 +1,136 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pybind/protobuf.h" + +namespace paddle { +namespace framework { + +void bind_program_desc(py::module &m) { + py::class_(m, "ProgramDesc", "") + .def_static("instance", + [] { return &GetProgramDesc(); }, + py::return_value_policy::reference) + .def_static("__create_program_desc__", + [] { + // Only used for unit-test + auto *prog_desc = new ProgramDesc; + auto *block = prog_desc->mutable_blocks()->Add(); + block->set_idx(0); + block->set_parent_idx(-1); + return prog_desc; + }) + .def("append_block", + [](ProgramDesc &self, BlockDesc &parent) { + auto desc = self.add_blocks(); + desc->set_idx(self.mutable_blocks()->size() - 1); + desc->set_parent_idx(parent.idx()); + return desc; + }, + py::return_value_policy::reference) + .def("root_block", + [](ProgramDesc &self) { return self.mutable_blocks()->Mutable(0); }, + py::return_value_policy::reference) + .def("__str__", [](ProgramDesc &self) { return self.DebugString(); }); +} + +void bind_block_desc(py::module &m) { + py::class_(m, "BlockDesc", "") + .def("id", [](BlockDesc &self) { return self.idx(); }) + .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) + .def("append_op", + [](BlockDesc &self) { return self.add_ops(); }, + py::return_value_policy::reference) + .def("new_var", + [](BlockDesc &self) { return self.add_vars(); }, + py::return_value_policy::reference); +} + +void bind_var_dses(py::module &m) { + py::class_(m, "VarDesc", "") + .def(py::init<>()) + .def("set_name", + [](VarDesc &self, const std::string &name) { self.set_name(name); }) + .def("set_shape", + [](VarDesc &self, const std::vector &dims) { + LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); + for (const int64_t &i : dims) { + lod_tensor_desc->add_dims(i); + } + }) + .def("set_data_type", + [](VarDesc &self, int type_id) { + LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); + lod_tensor_desc->set_data_type(static_cast(type_id)); + }) + .def("shape", [](VarDesc &self) { + const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); + int rank = lod_tensor_desc.dims_size(); + std::vector res(rank); + for (int i = 0; i < rank; ++i) { + res[i] = lod_tensor_desc.dims(i); + } + return res; + }); +} + +void bind_op_desc(py::module &m) { + auto op_desc_set_var = [](OpDesc::Var *var, + const std::string ¶meter, + const std::vector &arguments) { + var->set_parameter(parameter); + VectorToRepeated(arguments, var->mutable_arguments()); + }; + + auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { + auto attr = desc.add_attrs(); + attr->set_name(name); + return attr; + }; + + py::class_(m, "OpDesc", "") + .def("type", [](OpDesc &op) { return op.type(); }) + .def("set_input", + [op_desc_set_var](OpDesc &self, + const std::string ¶meter, + const std::vector &arguments) { + auto ipt = self.add_inputs(); + op_desc_set_var(ipt, parameter, arguments); + }) + .def("input_names", + [](OpDesc &self) { + std::vector ret_val; + ret_val.reserve(static_cast(self.inputs().size())); + std::transform( + self.inputs().begin(), + self.inputs().end(), + std::back_inserter(ret_val), + [](const OpDesc::Var &var) { return var.parameter(); }); + return ret_val; + }) + .def("__str__", [](OpDesc &self) { return self.DebugString(); }) + .def("set_output", + [op_desc_set_var](OpDesc &self, + const std::string ¶meter, + const std::vector &arguments) { + auto opt = self.add_outputs(); + op_desc_set_var(opt, parameter, arguments); + }) + .def("set_attr", + [op_desc_set_attr](OpDesc &self, const std::string &name, int i) { + op_desc_set_attr(self, name)->set_i(i); + }); +} +} // namespace framework +} // namespace paddle diff --git a/paddle/pybind/protobuf.h b/paddle/pybind/protobuf.h new file mode 100644 index 0000000000..ff4813cce7 --- /dev/null +++ b/paddle/pybind/protobuf.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/framework/op_registry.h" +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +namespace paddle { +namespace framework { + +template +inline std::vector RepeatedToVector( + const google::protobuf::RepeatedField& repeated_field) { + std::vector ret; + ret.reserve(repeated_field.size()); + std::copy( + repeated_field.begin(), repeated_field.end(), std::back_inserter(ret)); + return ret; +} + +template +inline void VectorToRepeated(const std::vector& vec, + RepeatedField* repeated_field) { + repeated_field->Reserve(vec.size()); + for (auto& elem : vec) { + *repeated_field->Add() = elem; + } +} + +void bind_program_desc(py::module& m); +void bind_block_desc(py::module& m); +void bind_var_dses(py::module& m); +void bind_op_desc(py::module& m); +} // namespace framework +} // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 5ccc8c377f..10c6670e00 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -12,13 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include +#include "paddle/pybind/protobuf.h" #include "paddle/framework/backward.h" #include "paddle/framework/lod_tensor.h" -#include "paddle/framework/op_registry.h" #include "paddle/operators/cond_op.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" @@ -27,11 +24,6 @@ limitations under the License. */ #include "paddle/pybind/pybind.h" #include "paddle/pybind/tensor_py.h" #include "paddle/string/to_string.h" -#include "pybind11/numpy.h" -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" - -namespace py = pybind11; namespace paddle { namespace framework { @@ -53,25 +45,6 @@ bool IsCompileGPU() { #endif } -template -inline std::vector RepeatedToVector( - const google::protobuf::RepeatedField &repeated_field) { - std::vector ret; - ret.reserve(repeated_field.size()); - std::copy( - repeated_field.begin(), repeated_field.end(), std::back_inserter(ret)); - return ret; -} - -template -inline void VectorToRepeated(const std::vector &vec, - RepeatedField *repeated_field) { - repeated_field->Reserve(vec.size()); - for (auto &elem : vec) { - *repeated_field->Add() = elem; - } -} - PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of PaddlePaddle"); @@ -334,113 +307,10 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compile_gpu", IsCompileGPU); - py::class_(m, "ProgramDesc", "") - .def_static("instance", - [] { return &GetProgramDesc(); }, - py::return_value_policy::reference) - .def_static("__create_program_desc__", - [] { - // Only used for unit-test - auto *prog_desc = new ProgramDesc; - auto *block = prog_desc->mutable_blocks()->Add(); - block->set_idx(0); - block->set_parent_idx(-1); - return prog_desc; - }) - .def("append_block", - [](ProgramDesc &self, BlockDesc &parent) { - auto desc = self.add_blocks(); - desc->set_idx(self.mutable_blocks()->size() - 1); - desc->set_parent_idx(parent.idx()); - return desc; - }, - py::return_value_policy::reference) - .def("root_block", - [](ProgramDesc &self) { return self.mutable_blocks()->Mutable(0); }, - py::return_value_policy::reference) - .def("__str__", [](ProgramDesc &self) { return self.DebugString(); }); - - py::class_(m, "BlockDesc", "") - .def("id", [](BlockDesc &self) { return self.idx(); }) - .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) - .def("append_op", - [](BlockDesc &self) { return self.add_ops(); }, - py::return_value_policy::reference) - .def("new_var", - [](BlockDesc &self) { return self.add_vars(); }, - py::return_value_policy::reference); - - py::class_(m, "VarDesc", "") - .def(py::init<>()) - .def("set_name", - [](VarDesc &self, const std::string &name) { self.set_name(name); }) - .def("set_shape", - [](VarDesc &self, const std::vector &dims) { - LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); - for (const int64_t &i : dims) { - lod_tensor_desc->add_dims(i); - } - }) - .def("set_data_type", - [](VarDesc &self, int type_id) { - LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); - lod_tensor_desc->set_data_type(static_cast(type_id)); - }) - .def("shape", [](VarDesc &self) { - const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); - int rank = lod_tensor_desc.dims_size(); - std::vector res(rank); - for (int i = 0; i < rank; ++i) { - res[i] = lod_tensor_desc.dims(i); - } - return res; - }); - - auto op_desc_set_var = [](OpDesc::Var *var, - const std::string ¶meter, - const std::vector &arguments) { - var->set_parameter(parameter); - VectorToRepeated(arguments, var->mutable_arguments()); - }; - - auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { - auto attr = desc.add_attrs(); - attr->set_name(name); - return attr; - }; - - py::class_(m, "OpDesc", "") - .def("type", [](OpDesc &op) { return op.type(); }) - .def("set_input", - [op_desc_set_var](OpDesc &self, - const std::string ¶meter, - const std::vector &arguments) { - auto ipt = self.add_inputs(); - op_desc_set_var(ipt, parameter, arguments); - }) - .def("input_names", - [](OpDesc &self) { - std::vector ret_val; - ret_val.reserve(static_cast(self.inputs().size())); - std::transform( - self.inputs().begin(), - self.inputs().end(), - std::back_inserter(ret_val), - [](const OpDesc::Var &var) { return var.parameter(); }); - return ret_val; - }) - .def("__str__", [](OpDesc &self) { return self.DebugString(); }) - .def("set_output", - [op_desc_set_var](OpDesc &self, - const std::string ¶meter, - const std::vector &arguments) { - auto opt = self.add_outputs(); - op_desc_set_var(opt, parameter, arguments); - }) - .def("set_attr", - [op_desc_set_attr](OpDesc &self, const std::string &name, int i) { - op_desc_set_attr(self, name)->set_i(i); - }); + bind_program_desc(m); + bind_block_desc(m); + bind_var_dses(m); + bind_op_desc(m); return m.ptr(); } From 37fd8fa1b6ec75ac447a93bea990338550402baf Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 11:39:36 -0700 Subject: [PATCH 17/51] Fix typo --- paddle/pybind/protobuf.cc | 8 ++++---- paddle/pybind/protobuf.h | 8 ++++---- paddle/pybind/pybind.cc | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 91f4c7d7c8..47b3c43ebf 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace framework { -void bind_program_desc(py::module &m) { +void BindProgramDesc(py::module &m) { py::class_(m, "ProgramDesc", "") .def_static("instance", [] { return &GetProgramDesc(); }, @@ -45,7 +45,7 @@ void bind_program_desc(py::module &m) { .def("__str__", [](ProgramDesc &self) { return self.DebugString(); }); } -void bind_block_desc(py::module &m) { +void BindBlockDesc(py::module &m) { py::class_(m, "BlockDesc", "") .def("id", [](BlockDesc &self) { return self.idx(); }) .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) @@ -57,7 +57,7 @@ void bind_block_desc(py::module &m) { py::return_value_policy::reference); } -void bind_var_dses(py::module &m) { +void BindVarDsec(py::module &m) { py::class_(m, "VarDesc", "") .def(py::init<>()) .def("set_name", @@ -85,7 +85,7 @@ void bind_var_dses(py::module &m) { }); } -void bind_op_desc(py::module &m) { +void BindOpDesc(py::module &m) { auto op_desc_set_var = [](OpDesc::Var *var, const std::string ¶meter, const std::vector &arguments) { diff --git a/paddle/pybind/protobuf.h b/paddle/pybind/protobuf.h index ff4813cce7..a32acfb038 100644 --- a/paddle/pybind/protobuf.h +++ b/paddle/pybind/protobuf.h @@ -46,9 +46,9 @@ inline void VectorToRepeated(const std::vector& vec, } } -void bind_program_desc(py::module& m); -void bind_block_desc(py::module& m); -void bind_var_dses(py::module& m); -void bind_op_desc(py::module& m); +void BindProgramDesc(py::module& m); +void BindBlockDesc(py::module& m); +void BindVarDsec(py::module& m); +void BindOpDesc(py::module& m); } // namespace framework } // namespace paddle diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 10c6670e00..d9dd7523bf 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -307,10 +307,10 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compile_gpu", IsCompileGPU); - bind_program_desc(m); - bind_block_desc(m); - bind_var_dses(m); - bind_op_desc(m); + BindProgramDesc(m); + BindBlockDesc(m); + BindVarDsec(m); + BindOpDesc(m); return m.ptr(); } From f5aa8b4d7ef508dcd66984ef36012eeff63a9c85 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 13:22:21 -0700 Subject: [PATCH 18/51] Update namespace of pybind/protobuf.cc and .h --- paddle/pybind/protobuf.cc | 11 +++++++++-- paddle/pybind/protobuf.h | 4 ++-- .../paddle/v2/framework/tests/test_protobuf_descs.py | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 47b3c43ebf..bfbe177e8f 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -15,9 +15,10 @@ limitations under the License. */ #include "paddle/pybind/protobuf.h" namespace paddle { -namespace framework { +namespace pybind { void BindProgramDesc(py::module &m) { + using namespace paddle::framework; // NOLINT py::class_(m, "ProgramDesc", "") .def_static("instance", [] { return &GetProgramDesc(); }, @@ -42,10 +43,14 @@ void BindProgramDesc(py::module &m) { .def("root_block", [](ProgramDesc &self) { return self.mutable_blocks()->Mutable(0); }, py::return_value_policy::reference) + .def("block", + [](ProgramDesc &self, int id) { return self.blocks(id); }, + py::return_value_policy::reference) .def("__str__", [](ProgramDesc &self) { return self.DebugString(); }); } void BindBlockDesc(py::module &m) { + using namespace paddle::framework; // NOLINT py::class_(m, "BlockDesc", "") .def("id", [](BlockDesc &self) { return self.idx(); }) .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) @@ -58,6 +63,7 @@ void BindBlockDesc(py::module &m) { } void BindVarDsec(py::module &m) { + using namespace paddle::framework; // NOLINT py::class_(m, "VarDesc", "") .def(py::init<>()) .def("set_name", @@ -86,6 +92,7 @@ void BindVarDsec(py::module &m) { } void BindOpDesc(py::module &m) { + using namespace paddle::framework; // NOLINT auto op_desc_set_var = [](OpDesc::Var *var, const std::string ¶meter, const std::vector &arguments) { @@ -132,5 +139,5 @@ void BindOpDesc(py::module &m) { op_desc_set_attr(self, name)->set_i(i); }); } -} // namespace framework +} // namespace pybind } // namespace paddle diff --git a/paddle/pybind/protobuf.h b/paddle/pybind/protobuf.h index a32acfb038..de9a008e25 100644 --- a/paddle/pybind/protobuf.h +++ b/paddle/pybind/protobuf.h @@ -25,7 +25,7 @@ limitations under the License. */ namespace py = pybind11; namespace paddle { -namespace framework { +namespace pybind { template inline std::vector RepeatedToVector( @@ -50,5 +50,5 @@ void BindProgramDesc(py::module& m); void BindBlockDesc(py::module& m); void BindVarDsec(py::module& m); void BindOpDesc(py::module& m); -} // namespace framework +} // namespace pybind } // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index 71bdca8765..d0192814ef 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -23,6 +23,7 @@ class TestProgramDesc(unittest.TestCase): self.assertEqual(block_root.id(), block1.parent()) block3 = prog_desc.append_block(block_root) self.assertEqual(block3.parent(), block_root.id()) + self.assertEqual(prog_desc.block(1).id(), 1) class TestVarDesc(unittest.TestCase): From 027fc62cb020801cef53fb0e753d3a31fb7e6f39 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 13:47:29 -0700 Subject: [PATCH 19/51] Use Vec2Repeated Repeated2Vec --- paddle/pybind/protobuf.cc | 31 +++++++++++++++++++++---------- paddle/pybind/protobuf.h | 19 ------------------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index bfbe177e8f..b86185bf5b 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -17,6 +17,25 @@ limitations under the License. */ namespace paddle { namespace pybind { +template +inline std::vector RepeatedToVector( + const google::protobuf::RepeatedField &repeated_field) { + std::vector ret; + ret.reserve(repeated_field.size()); + std::copy( + repeated_field.begin(), repeated_field.end(), std::back_inserter(ret)); + return ret; +} + +template +inline void VectorToRepeated(const std::vector &vec, + RepeatedField *repeated_field) { + repeated_field->Reserve(vec.size()); + for (auto &elem : vec) { + *repeated_field->Add() = elem; + } +} + void BindProgramDesc(py::module &m) { using namespace paddle::framework; // NOLINT py::class_(m, "ProgramDesc", "") @@ -70,10 +89,7 @@ void BindVarDsec(py::module &m) { [](VarDesc &self, const std::string &name) { self.set_name(name); }) .def("set_shape", [](VarDesc &self, const std::vector &dims) { - LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); - for (const int64_t &i : dims) { - lod_tensor_desc->add_dims(i); - } + VectorToRepeated(dims, self.mutable_lod_tensor()->mutable_dims()); }) .def("set_data_type", [](VarDesc &self, int type_id) { @@ -82,12 +98,7 @@ void BindVarDsec(py::module &m) { }) .def("shape", [](VarDesc &self) { const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); - int rank = lod_tensor_desc.dims_size(); - std::vector res(rank); - for (int i = 0; i < rank; ++i) { - res[i] = lod_tensor_desc.dims(i); - } - return res; + return RepeatedToVector(lod_tensor_desc.dims()); }); } diff --git a/paddle/pybind/protobuf.h b/paddle/pybind/protobuf.h index de9a008e25..2721c128d1 100644 --- a/paddle/pybind/protobuf.h +++ b/paddle/pybind/protobuf.h @@ -27,25 +27,6 @@ namespace py = pybind11; namespace paddle { namespace pybind { -template -inline std::vector RepeatedToVector( - const google::protobuf::RepeatedField& repeated_field) { - std::vector ret; - ret.reserve(repeated_field.size()); - std::copy( - repeated_field.begin(), repeated_field.end(), std::back_inserter(ret)); - return ret; -} - -template -inline void VectorToRepeated(const std::vector& vec, - RepeatedField* repeated_field) { - repeated_field->Reserve(vec.size()); - for (auto& elem : vec) { - *repeated_field->Add() = elem; - } -} - void BindProgramDesc(py::module& m); void BindBlockDesc(py::module& m); void BindVarDsec(py::module& m); From bddb40609d604cd68f6418423147ec1ec5ec8de0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 15:35:48 -0700 Subject: [PATCH 20/51] Buggy code --- paddle/pybind/protobuf.cc | 285 ++++++++++++------ .../v2/framework/tests/test_protobuf_descs.py | 21 +- 2 files changed, 214 insertions(+), 92 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index b86185bf5b..b4ed9c4335 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/pybind/protobuf.h" +#include namespace paddle { namespace pybind { +using namespace paddle::framework; // NOLINT + template inline std::vector RepeatedToVector( const google::protobuf::RepeatedField &repeated_field) { @@ -36,45 +39,154 @@ inline void VectorToRepeated(const std::vector &vec, } } +class ProgramDescBind; +class OpDescBind; +class BlockDescBind; + +class OpDescBind { +public: + explicit OpDescBind(BlockDescBind *block) : block_(block) {} + + operator OpDesc *() { return &op_desc_; } + +private: + BlockDescBind *block_; + OpDesc op_desc_; +}; + +class BlockDescBind { +public: + BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) + : prog_(prog), desc_(desc), need_update_(false) {} + + ~BlockDescBind() { + std::cerr << "dtor " << this << "," << desc_ << std::endl; + } + + int32_t id() const { + std::cerr << "desc ptr " << desc_ << std::endl; + return desc_->idx(); + } + + int32_t Parent() const { return desc_->parent_idx(); } + + OpDescBind *AppendOp() { + need_update_ = true; + ops_.emplace_back(this); + return &ops_.back(); + } + + void Sync() { + if (need_update_) { + auto &op_field = *this->desc_->mutable_ops(); + op_field.Clear(); + op_field.Reserve(static_cast(ops_.size())); + for (auto &op_desc : ops_) { + op_field.AddAllocated(op_desc); + } + } + } + +private: + ProgramDescBind *prog_; // not_own + BlockDesc *desc_; // not_own + bool need_update_; + + std::deque ops_; +}; + +using ProgDescMap = + std::unordered_map>; +static ProgDescMap *g_bind_map = nullptr; + +class ProgramDescBind { +public: + static ProgramDescBind &Instance(ProgramDesc *prog) { + if (g_bind_map == nullptr) { + g_bind_map = new ProgDescMap(); + } + auto &map = *g_bind_map; + auto &ptr = map[prog]; + + if (ptr == nullptr) { + ptr.reset(new ProgramDescBind(prog)); + } + return *ptr; + } + + BlockDescBind *AppendBlock(BlockDescBind *parent) { + auto *b = prog_->add_blocks(); + std::cerr << "block ptr " << b << std::endl; + std::cerr << "pass ptr " << parent << std::endl; + b->set_parent_idx(parent->id()); + b->set_idx(prog_->blocks_size() - 1); + blocks_.emplace_back(this, b); + return &blocks_.back(); + } + + BlockDescBind *Root() { return &blocks_.front(); } + + BlockDescBind *Block(size_t idx) { return &blocks_[idx]; } + + std::string DebugString() { return Proto()->DebugString(); } + + size_t Size() const { return blocks_.size(); } + + ProgramDesc *Proto() { + for (auto &block : blocks_) { + block.Sync(); + } + return prog_; + } + +private: + explicit ProgramDescBind(ProgramDesc *prog) : prog_(prog) { + for (auto &block : *prog->mutable_blocks()) { + blocks_.emplace_back(this, &block); + } + } + + // Not owned + ProgramDesc *prog_; + + std::vector blocks_; +}; + void BindProgramDesc(py::module &m) { - using namespace paddle::framework; // NOLINT - py::class_(m, "ProgramDesc", "") + py::class_(m, "ProgramDesc", "") .def_static("instance", - [] { return &GetProgramDesc(); }, + []() -> ProgramDescBind * { + return &ProgramDescBind::Instance(&GetProgramDesc()); + }, py::return_value_policy::reference) .def_static("__create_program_desc__", - [] { + []() -> ProgramDescBind * { // Only used for unit-test auto *prog_desc = new ProgramDesc; auto *block = prog_desc->mutable_blocks()->Add(); block->set_idx(0); block->set_parent_idx(-1); - return prog_desc; - }) + return &ProgramDescBind::Instance(prog_desc); + }, + py::return_value_policy::reference) .def("append_block", - [](ProgramDesc &self, BlockDesc &parent) { - auto desc = self.add_blocks(); - desc->set_idx(self.mutable_blocks()->size() - 1); - desc->set_parent_idx(parent.idx()); - return desc; - }, + &ProgramDescBind::AppendBlock, py::return_value_policy::reference) .def("root_block", - [](ProgramDesc &self) { return self.mutable_blocks()->Mutable(0); }, + &ProgramDescBind::Root, py::return_value_policy::reference) - .def("block", - [](ProgramDesc &self, int id) { return self.blocks(id); }, - py::return_value_policy::reference) - .def("__str__", [](ProgramDesc &self) { return self.DebugString(); }); + .def("block", &ProgramDescBind::Block, py::return_value_policy::reference) + .def("__str__", &ProgramDescBind::DebugString) + .def("num_blocks", &ProgramDescBind::Size); } void BindBlockDesc(py::module &m) { using namespace paddle::framework; // NOLINT - py::class_(m, "BlockDesc", "") - .def("id", [](BlockDesc &self) { return self.idx(); }) - .def("parent", [](BlockDesc &self) { return self.parent_idx(); }) + py::class_(m, "BlockDesc", "") + .def_property_readonly("id", &BlockDescBind::id) + .def_property_readonly("parent", &BlockDescBind::Parent) .def("append_op", - [](BlockDesc &self) { return self.add_ops(); }, + &BlockDescBind::AppendOp, py::return_value_policy::reference) .def("new_var", [](BlockDesc &self) { return self.add_vars(); }, @@ -82,73 +194,76 @@ void BindBlockDesc(py::module &m) { } void BindVarDsec(py::module &m) { - using namespace paddle::framework; // NOLINT - py::class_(m, "VarDesc", "") - .def(py::init<>()) - .def("set_name", - [](VarDesc &self, const std::string &name) { self.set_name(name); }) - .def("set_shape", - [](VarDesc &self, const std::vector &dims) { - VectorToRepeated(dims, self.mutable_lod_tensor()->mutable_dims()); - }) - .def("set_data_type", - [](VarDesc &self, int type_id) { - LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); - lod_tensor_desc->set_data_type(static_cast(type_id)); - }) - .def("shape", [](VarDesc &self) { - const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); - return RepeatedToVector(lod_tensor_desc.dims()); - }); + py::class_(m, "VarDesc", ""); + // using namespace paddle::framework; // NOLINT + // py::class_(m, "VarDesc", "") + // .def(py::init<>()) + // .def("set_name", + // [](VarDesc &self, const std::string &name) { self.set_name(name); + // }) + // .def("set_shape", + // [](VarDesc &self, const std::vector &dims) { + // VectorToRepeated(dims, + // self.mutable_lod_tensor()->mutable_dims()); + // }) + // .def("set_data_type", + // [](VarDesc &self, int type_id) { + // LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); + // lod_tensor_desc->set_data_type(static_cast(type_id)); + // }) + // .def("shape", [](VarDesc &self) { + // const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); + // return RepeatedToVector(lod_tensor_desc.dims()); + // }); } void BindOpDesc(py::module &m) { - using namespace paddle::framework; // NOLINT - auto op_desc_set_var = [](OpDesc::Var *var, - const std::string ¶meter, - const std::vector &arguments) { - var->set_parameter(parameter); - VectorToRepeated(arguments, var->mutable_arguments()); - }; - - auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { - auto attr = desc.add_attrs(); - attr->set_name(name); - return attr; - }; - - py::class_(m, "OpDesc", "") - .def("type", [](OpDesc &op) { return op.type(); }) - .def("set_input", - [op_desc_set_var](OpDesc &self, - const std::string ¶meter, - const std::vector &arguments) { - auto ipt = self.add_inputs(); - op_desc_set_var(ipt, parameter, arguments); - }) - .def("input_names", - [](OpDesc &self) { - std::vector ret_val; - ret_val.reserve(static_cast(self.inputs().size())); - std::transform( - self.inputs().begin(), - self.inputs().end(), - std::back_inserter(ret_val), - [](const OpDesc::Var &var) { return var.parameter(); }); - return ret_val; - }) - .def("__str__", [](OpDesc &self) { return self.DebugString(); }) - .def("set_output", - [op_desc_set_var](OpDesc &self, - const std::string ¶meter, - const std::vector &arguments) { - auto opt = self.add_outputs(); - op_desc_set_var(opt, parameter, arguments); - }) - .def("set_attr", - [op_desc_set_attr](OpDesc &self, const std::string &name, int i) { - op_desc_set_attr(self, name)->set_i(i); - }); + // auto op_desc_set_var = [](OpDesc::Var *var, + // const std::string ¶meter, + // const std::vector &arguments) { + // var->set_parameter(parameter); + // VectorToRepeated(arguments, var->mutable_arguments()); + // }; + // + // auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { + // auto attr = desc.add_attrs(); + // attr->set_name(name); + // return attr; + // }; + py::class_(m, "OpDesc", ""); + + // .def("type", [](OpDesc &op) { return op.type(); }) + // .def("set_input", + // [op_desc_set_var](OpDesc &self, + // const std::string ¶meter, + // const std::vector &arguments) { + // auto ipt = self.add_inputs(); + // op_desc_set_var(ipt, parameter, arguments); + // }) + // .def("input_names", + // [](OpDesc &self) { + // std::vector ret_val; + // ret_val.reserve(static_cast(self.inputs().size())); + // std::transform( + // self.inputs().begin(), + // self.inputs().end(), + // std::back_inserter(ret_val), + // [](const OpDesc::Var &var) { return var.parameter(); }); + // return ret_val; + // }) + // .def("__str__", [](OpDesc &self) { return self.DebugString(); }) + // .def("set_output", + // [op_desc_set_var](OpDesc &self, + // const std::string ¶meter, + // const std::vector &arguments) { + // auto opt = self.add_outputs(); + // op_desc_set_var(opt, parameter, arguments); + // }) + // .def("set_attr", + // [op_desc_set_attr](OpDesc &self, const std::string &name, int i) + // { + // op_desc_set_attr(self, name)->set_i(i); + // }); } } // namespace pybind } // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index d0192814ef..b5ff2d4c36 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -9,21 +9,28 @@ class TestProgramDesc(unittest.TestCase): del program_desc program_desc = core.ProgramDesc.instance() self.assertIsNotNone(program_desc) - self.assertIsNotNone(program_desc.root_block()) + self.assertIsNotNone(program_desc.block(0)) del program_desc def test_append_block(self): prog_desc = core.ProgramDesc.__create_program_desc__() self.assertIsNotNone(prog_desc) - block_root = prog_desc.root_block() - self.assertEqual(block_root.id(), 0) + block_root = prog_desc.block(0) + self.assertIsNotNone(block_root) + print 'here' + self.assertEqual(block_root.id, 0) block1 = prog_desc.append_block(block_root) block2 = prog_desc.append_block(block1) - self.assertEqual(block1.id(), block2.parent()) - self.assertEqual(block_root.id(), block1.parent()) + self.assertIsNotNone(block1) + print 'here' + self.assertEqual(block1.id, block2.parent) + print 'here' + self.assertEqual(block_root.id, block1.parent) + print 'here' block3 = prog_desc.append_block(block_root) - self.assertEqual(block3.parent(), block_root.id()) - self.assertEqual(prog_desc.block(1).id(), 1) + self.assertEqual(block3.parent, block_root.id) + self.assertEqual(prog_desc.block(1).id, 1) + self.assertEqual(4, prog_desc.num_blocks()) class TestVarDesc(unittest.TestCase): From dc643a33523b48ace8e05dcfe0167b21d3687631 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 15:42:14 -0700 Subject: [PATCH 21/51] Hot fix unittest --- paddle/pybind/protobuf.cc | 17 ++++------------- .../v2/framework/tests/test_protobuf_descs.py | 4 ---- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index b4ed9c4335..0fb78bf7a2 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -59,14 +59,7 @@ public: BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) : prog_(prog), desc_(desc), need_update_(false) {} - ~BlockDescBind() { - std::cerr << "dtor " << this << "," << desc_ << std::endl; - } - - int32_t id() const { - std::cerr << "desc ptr " << desc_ << std::endl; - return desc_->idx(); - } + int32_t id() const { return desc_->idx(); } int32_t Parent() const { return desc_->parent_idx(); } @@ -114,11 +107,9 @@ public: return *ptr; } - BlockDescBind *AppendBlock(BlockDescBind *parent) { + BlockDescBind *AppendBlock(const BlockDescBind &parent) { auto *b = prog_->add_blocks(); - std::cerr << "block ptr " << b << std::endl; - std::cerr << "pass ptr " << parent << std::endl; - b->set_parent_idx(parent->id()); + b->set_parent_idx(parent.id()); b->set_idx(prog_->blocks_size() - 1); blocks_.emplace_back(this, b); return &blocks_.back(); @@ -141,6 +132,7 @@ public: private: explicit ProgramDescBind(ProgramDesc *prog) : prog_(prog) { + blocks_.reserve(100); for (auto &block : *prog->mutable_blocks()) { blocks_.emplace_back(this, &block); } @@ -181,7 +173,6 @@ void BindProgramDesc(py::module &m) { } void BindBlockDesc(py::module &m) { - using namespace paddle::framework; // NOLINT py::class_(m, "BlockDesc", "") .def_property_readonly("id", &BlockDescBind::id) .def_property_readonly("parent", &BlockDescBind::Parent) diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index b5ff2d4c36..fbe1f7152b 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -17,16 +17,12 @@ class TestProgramDesc(unittest.TestCase): self.assertIsNotNone(prog_desc) block_root = prog_desc.block(0) self.assertIsNotNone(block_root) - print 'here' self.assertEqual(block_root.id, 0) block1 = prog_desc.append_block(block_root) block2 = prog_desc.append_block(block1) self.assertIsNotNone(block1) - print 'here' self.assertEqual(block1.id, block2.parent) - print 'here' self.assertEqual(block_root.id, block1.parent) - print 'here' block3 = prog_desc.append_block(block_root) self.assertEqual(block3.parent, block_root.id) self.assertEqual(prog_desc.block(1).id, 1) From e05e27a7f71ddb6549e406f0fbc339c789373935 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 16:59:15 -0700 Subject: [PATCH 22/51] Fix bug --- paddle/pybind/protobuf.cc | 64 +++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 0fb78bf7a2..5511841c8b 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -42,15 +42,23 @@ inline void VectorToRepeated(const std::vector &vec, class ProgramDescBind; class OpDescBind; class BlockDescBind; +class VarDescBind; -class OpDescBind { +class VarDescBind { public: - explicit OpDescBind(BlockDescBind *block) : block_(block) {} + explicit VarDescBind(const std::string &name) { var_desc_.set_name(name); } + + VarDesc *Proto() { return &var_desc_; } + +private: + VarDesc var_desc_; +}; - operator OpDesc *() { return &op_desc_; } +class OpDescBind { +public: + OpDesc *Proto() { return &op_desc_; } private: - BlockDescBind *block_; OpDesc op_desc_; }; @@ -59,14 +67,28 @@ public: BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) : prog_(prog), desc_(desc), need_update_(false) {} + BlockDescBind(const BlockDescBind &o) = delete; + BlockDescBind &operator=(const BlockDescBind &o) = delete; + int32_t id() const { return desc_->idx(); } int32_t Parent() const { return desc_->parent_idx(); } + VarDescBind *NewVar(const std::string &name) { + need_update_ = true; + auto it = vars_.find(name); + PADDLE_ENFORCE(it == vars_.end(), "Duplicated variable %s", name); + auto var = new VarDescBind(name); + vars_[name].reset(var); + return var; + } + + BlockDescBind *ParentBlock() const; + OpDescBind *AppendOp() { need_update_ = true; - ops_.emplace_back(this); - return &ops_.back(); + ops_.emplace_back(new OpDescBind()); + return ops_.back().get(); } void Sync() { @@ -75,8 +97,9 @@ public: op_field.Clear(); op_field.Reserve(static_cast(ops_.size())); for (auto &op_desc : ops_) { - op_field.AddAllocated(op_desc); + op_field.AddAllocated(op_desc->Proto()); } + need_update_ = false; } } @@ -85,7 +108,8 @@ private: BlockDesc *desc_; // not_own bool need_update_; - std::deque ops_; + std::deque> ops_; + std::unordered_map> vars_; }; using ProgDescMap = @@ -106,18 +130,20 @@ public: } return *ptr; } + ProgramDescBind(const ProgramDescBind &o) = delete; + ProgramDescBind &operator=(const ProgramDescBind &o) = delete; BlockDescBind *AppendBlock(const BlockDescBind &parent) { auto *b = prog_->add_blocks(); b->set_parent_idx(parent.id()); b->set_idx(prog_->blocks_size() - 1); - blocks_.emplace_back(this, b); - return &blocks_.back(); + blocks_.emplace_back(new BlockDescBind(this, b)); + return blocks_.back().get(); } - BlockDescBind *Root() { return &blocks_.front(); } + BlockDescBind *Root() { return blocks_.front().get(); } - BlockDescBind *Block(size_t idx) { return &blocks_[idx]; } + BlockDescBind *Block(size_t idx) { return blocks_[idx].get(); } std::string DebugString() { return Proto()->DebugString(); } @@ -125,25 +151,31 @@ public: ProgramDesc *Proto() { for (auto &block : blocks_) { - block.Sync(); + block->Sync(); } return prog_; } private: explicit ProgramDescBind(ProgramDesc *prog) : prog_(prog) { - blocks_.reserve(100); for (auto &block : *prog->mutable_blocks()) { - blocks_.emplace_back(this, &block); + blocks_.emplace_back(new BlockDescBind(this, &block)); } } // Not owned ProgramDesc *prog_; - std::vector blocks_; + std::vector> blocks_; }; +BlockDescBind *BlockDescBind::ParentBlock() const { + if (this->desc_->parent_idx() == -1) { + return nullptr; + } + return prog_->Block(static_cast(this->desc_->parent_idx())); +} + void BindProgramDesc(py::module &m) { py::class_(m, "ProgramDesc", "") .def_static("instance", From eeb7c8ad795d6d7159d3659a2d41709653e2e347 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 17:34:47 -0700 Subject: [PATCH 23/51] Compelete VarDescBind --- paddle/pybind/protobuf.cc | 44 ++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 5511841c8b..126c2ce1c7 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -46,12 +46,24 @@ class VarDescBind; class VarDescBind { public: - explicit VarDescBind(const std::string &name) { var_desc_.set_name(name); } + explicit VarDescBind(const std::string &name) { desc_.set_name(name); } - VarDesc *Proto() { return &var_desc_; } + VarDesc *Proto() { return &desc_; } + + void SetShape(const vector &dims) { + VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); + } + + void SetDataType(int type_id) { + desc_.mutable_lod_tensor()->set_data_type(const_cast(type_id)); + } + + std::vector Shape() { + return RepeatedToVector(desc_.lod_tensor().dims()); + } private: - VarDesc var_desc_; + VarDesc desc_; }; class OpDescBind { @@ -217,27 +229,11 @@ void BindBlockDesc(py::module &m) { } void BindVarDsec(py::module &m) { - py::class_(m, "VarDesc", ""); - // using namespace paddle::framework; // NOLINT - // py::class_(m, "VarDesc", "") - // .def(py::init<>()) - // .def("set_name", - // [](VarDesc &self, const std::string &name) { self.set_name(name); - // }) - // .def("set_shape", - // [](VarDesc &self, const std::vector &dims) { - // VectorToRepeated(dims, - // self.mutable_lod_tensor()->mutable_dims()); - // }) - // .def("set_data_type", - // [](VarDesc &self, int type_id) { - // LoDTensorDesc *lod_tensor_desc = self.mutable_lod_tensor(); - // lod_tensor_desc->set_data_type(static_cast(type_id)); - // }) - // .def("shape", [](VarDesc &self) { - // const LoDTensorDesc &lod_tensor_desc = self.lod_tensor(); - // return RepeatedToVector(lod_tensor_desc.dims()); - // }); + py::class_(m, "VarDesc", "") + .def(py::init<>()) + .def("set_shape", VarDescBind::SetShape) + .def("set_data_type", VarDescBind::SetDataType) + .def("shape", VarDescBind::Shape); } void BindOpDesc(py::module &m) { From ddf2448484cb6d183032e8d616ed51176dea9ded Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 17:46:48 -0700 Subject: [PATCH 24/51] Update Input/Output of Op --- paddle/pybind/protobuf.cc | 145 +++++++++++------- .../v2/framework/tests/test_protobuf_descs.py | 19 +++ 2 files changed, 112 insertions(+), 52 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 5511841c8b..67d6252af8 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/pybind/protobuf.h" #include +#include "paddle/framework/attribute.h" namespace paddle { namespace pybind { @@ -56,10 +57,90 @@ private: class OpDescBind { public: - OpDesc *Proto() { return &op_desc_; } + OpDesc *Proto() { + Sync(); + return &op_desc_; + } + + std::string Type() const { return op_desc_.type(); } + + void SetType(const std::string &type) { op_desc_.set_type(type); } + + const std::vector &Input(const std::string &name) const { + auto it = inputs_.find(name); + PADDLE_ENFORCE( + it != inputs_.end(), "Input %s cannot be found in Op %s", name, Type()); + return it->second; + } + + std::vector InputNames() const { + std::vector retv; + retv.reserve(this->inputs_.size()); + for (auto &ipt : this->inputs_) { + retv.push_back(ipt.first); + } + return retv; + } + + void SetInput(const std::string ¶m_name, + const std::vector &args) { + need_update_ = true; + inputs_[param_name] = args; + } + + const std::vector &Output(const std::string &name) const { + auto it = outputs_.find(name); + PADDLE_ENFORCE(it != outputs_.end(), + "Output %s cannot be found in Op %s", + name, + Type()); + return it->second; + } + + std::vector OutputNames() const { + std::vector retv; + retv.reserve(this->outputs_.size()); + for (auto &ipt : this->outputs_) { + retv.push_back(ipt.first); + } + return retv; + } + + void SetOutput(const std::string ¶m_name, + const std::vector &args) { + need_update_ = true; + this->outputs_[param_name] = args; + } + + std::string DebugString() { return this->Proto()->DebugString(); } + + void Sync() { + if (need_update_) { + this->op_desc_.mutable_inputs()->Clear(); + for (auto &ipt : inputs_) { + auto *input = op_desc_.add_inputs(); + input->set_parameter(ipt.first); + VectorToRepeated(ipt.second, input->mutable_arguments()); + } + + this->op_desc_.mutable_outputs()->Clear(); + for (auto &opt : outputs_) { + auto *output = op_desc_.add_outputs(); + output->set_parameter(opt.first); + VectorToRepeated(opt.second, output->mutable_arguments()); + } + + need_update_ = false; + } + } private: OpDesc op_desc_; + std::unordered_map> inputs_; + std::unordered_map> outputs_; + std::unordered_map attrs_; + + bool need_update_{false}; }; class BlockDescBind { @@ -141,8 +222,6 @@ public: return blocks_.back().get(); } - BlockDescBind *Root() { return blocks_.front().get(); } - BlockDescBind *Block(size_t idx) { return blocks_[idx].get(); } std::string DebugString() { return Proto()->DebugString(); } @@ -196,9 +275,6 @@ void BindProgramDesc(py::module &m) { .def("append_block", &ProgramDescBind::AppendBlock, py::return_value_policy::reference) - .def("root_block", - &ProgramDescBind::Root, - py::return_value_policy::reference) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference) .def("__str__", &ProgramDescBind::DebugString) .def("num_blocks", &ProgramDescBind::Size); @@ -241,52 +317,17 @@ void BindVarDsec(py::module &m) { } void BindOpDesc(py::module &m) { - // auto op_desc_set_var = [](OpDesc::Var *var, - // const std::string ¶meter, - // const std::vector &arguments) { - // var->set_parameter(parameter); - // VectorToRepeated(arguments, var->mutable_arguments()); - // }; - // - // auto op_desc_set_attr = [](OpDesc &desc, const std::string &name) { - // auto attr = desc.add_attrs(); - // attr->set_name(name); - // return attr; - // }; - py::class_(m, "OpDesc", ""); - - // .def("type", [](OpDesc &op) { return op.type(); }) - // .def("set_input", - // [op_desc_set_var](OpDesc &self, - // const std::string ¶meter, - // const std::vector &arguments) { - // auto ipt = self.add_inputs(); - // op_desc_set_var(ipt, parameter, arguments); - // }) - // .def("input_names", - // [](OpDesc &self) { - // std::vector ret_val; - // ret_val.reserve(static_cast(self.inputs().size())); - // std::transform( - // self.inputs().begin(), - // self.inputs().end(), - // std::back_inserter(ret_val), - // [](const OpDesc::Var &var) { return var.parameter(); }); - // return ret_val; - // }) - // .def("__str__", [](OpDesc &self) { return self.DebugString(); }) - // .def("set_output", - // [op_desc_set_var](OpDesc &self, - // const std::string ¶meter, - // const std::vector &arguments) { - // auto opt = self.add_outputs(); - // op_desc_set_var(opt, parameter, arguments); - // }) - // .def("set_attr", - // [op_desc_set_attr](OpDesc &self, const std::string &name, int i) - // { - // op_desc_set_attr(self, name)->set_i(i); - // }); + py::class_(m, "OpDesc", "") + .def("type", &OpDescBind::Type) + .def("set_type", &OpDescBind::SetType) + .def("input", &OpDescBind::Input) + .def("input_names", &OpDescBind::InputNames) + .def("set_input", &OpDescBind::SetInput) + .def("output", &OpDescBind::Output) + .def("output_names", &OpDescBind::OutputNames) + .def("set_output", &OpDescBind::SetOutput) + .def("__str__", &OpDescBind::DebugString) + .def("__repr__", &OpDescBind::DebugString); } } // namespace pybind } // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index fbe1f7152b..950a936307 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -2,6 +2,25 @@ import unittest import paddle.v2.framework.core as core +class TestOpDesc(unittest.TestCase): + def test_op_desc(self): + prog = core.ProgramDesc.__create_program_desc__() + self.assertIsNotNone(prog) + block = prog.block(0) + self.assertIsNotNone(block) + op = block.append_op() + self.assertIsNotNone(op) + op.set_type("test") + self.assertEqual("test", op.type()) + op.set_input("X", ["a", "b", "c"]) + self.assertEqual(["a", "b", "c"], op.input("X")) + self.assertEqual(["X"], op.input_names()) + + op.set_output("Out", ["z"]) + self.assertEqual(['z'], op.output("Out")) + self.assertEqual(["Out"], op.output_names()) + + class TestProgramDesc(unittest.TestCase): def test_instance(self): program_desc = core.ProgramDesc.instance() From 08e99006216395ee61f3dad3047dd44316829a66 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 22 Sep 2017 18:29:25 -0700 Subject: [PATCH 25/51] Fix bugs --- paddle/pybind/protobuf.cc | 18 +++++++++--------- .../v2/framework/tests/test_protobuf_descs.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 126c2ce1c7..de6db60730 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -50,12 +50,12 @@ public: VarDesc *Proto() { return &desc_; } - void SetShape(const vector &dims) { + void SetShape(const std::vector &dims) { VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); } void SetDataType(int type_id) { - desc_.mutable_lod_tensor()->set_data_type(const_cast(type_id)); + desc_.mutable_lod_tensor()->set_data_type(static_cast(type_id)); } std::vector Shape() { @@ -86,7 +86,8 @@ public: int32_t Parent() const { return desc_->parent_idx(); } - VarDescBind *NewVar(const std::string &name) { + VarDescBind *NewVar(py::bytes name_bytes) { + std::string name = name_bytes; need_update_ = true; auto it = vars_.find(name); PADDLE_ENFORCE(it == vars_.end(), "Duplicated variable %s", name); @@ -224,16 +225,15 @@ void BindBlockDesc(py::module &m) { &BlockDescBind::AppendOp, py::return_value_policy::reference) .def("new_var", - [](BlockDesc &self) { return self.add_vars(); }, + &BlockDescBind::NewVar, py::return_value_policy::reference); } void BindVarDsec(py::module &m) { - py::class_(m, "VarDesc", "") - .def(py::init<>()) - .def("set_shape", VarDescBind::SetShape) - .def("set_data_type", VarDescBind::SetDataType) - .def("shape", VarDescBind::Shape); + py::class_(m, "VarDesc", "") + .def("set_shape", &VarDescBind::SetShape) + .def("set_data_type", &VarDescBind::SetDataType) + .def("shape", &VarDescBind::Shape); } void BindOpDesc(py::module &m) { diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index fbe1f7152b..f1074f6bb5 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -33,7 +33,7 @@ class TestVarDesc(unittest.TestCase): def test_shape(self): program_desc = core.ProgramDesc.instance() block = program_desc.root_block() - var = block.new_var() + var = block.new_var('my_var') src_shape = [3, 2, 10, 8] var.set_shape(src_shape) res_shape = var.shape() From afeb01f7620a2280c946f359065589d8b0d59062 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 22 Sep 2017 18:30:05 -0700 Subject: [PATCH 26/51] Stash --- paddle/pybind/protobuf.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 67d6252af8..345bb02c86 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -114,6 +114,14 @@ public: std::string DebugString() { return this->Proto()->DebugString(); } + struct SetAttrDescVisitor : public boost::static_visitor { + explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} + OpDesc::Attr *attr_; + void operator()(int v) { attr_->set_i(v); } + void operator()(float v) { attr_->set_f(v); } + void operator()(const std::string &v) { attr_->set_s(v); } + }; + void Sync() { if (need_update_) { this->op_desc_.mutable_inputs()->Clear(); @@ -130,6 +138,13 @@ public: VectorToRepeated(opt.second, output->mutable_arguments()); } + this->op_desc_.mutable_attrs()->Clear(); + for (auto &attr : attrs_) { + auto *attr_desc = op_desc_.add_attrs(); + attr_desc->set_name(attr.first); + attr_desc->set_type(static_cast(attr.second.which() - 1)); + } + need_update_ = false; } } From fdd68fd1a17a78ead01911a8685f02ea871398a5 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 25 Sep 2017 10:14:38 -0700 Subject: [PATCH 27/51] Refine Visitor --- paddle/pybind/protobuf.cc | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 23c322ac36..3b2ac68714 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -128,10 +128,24 @@ public: struct SetAttrDescVisitor : public boost::static_visitor { explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} - OpDesc::Attr *attr_; - void operator()(int v) { attr_->set_i(v); } - void operator()(float v) { attr_->set_f(v); } - void operator()(const std::string &v) { attr_->set_s(v); } + mutable OpDesc::Attr *attr_; + void operator()(int v) const { attr_->set_i(v); } + void operator()(float v) const { attr_->set_f(v); } + void operator()(const std::string &v) const { attr_->set_s(v); } + void operator()(bool b) const { attr_->set_b(b); } + + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_ints()); + } + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_floats()); + } + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_strings()); + } + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_bools()); + } }; void Sync() { From 699dbe3be9fd3020fe44d1401f5d1a492d98e40b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 25 Sep 2017 11:38:16 -0700 Subject: [PATCH 28/51] Use `bool` for PADDLE_ENFORCE, not int * If stat is an integer, bool value will implicit cast to int before pass to PADDLE_ENFORCE --- paddle/platform/enforce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index df5f71ed76..b523ef03c0 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -107,7 +107,7 @@ struct EnforceNotMet : public std::exception { template inline typename std::enable_if::type throw_on_error( - int stat, const Args&... args) { + bool stat, const Args&... args) { if (UNLIKELY(!(stat))) { throw std::runtime_error(string::Sprintf(args...)); } From 1bfa0e130a17613c0b0e2108e7d220d4630e928a Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 25 Sep 2017 13:48:13 -0700 Subject: [PATCH 29/51] Add `Prepend` for BlockDescBind --- paddle/pybind/protobuf.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index de6db60730..f119a12e98 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -104,6 +104,12 @@ public: return ops_.back().get(); } + OpDescBind *PrependOp() { + need_update_ = true; + ops_.emplace_front(new OpDescBind()); + return ops_.front().get(); + } + void Sync() { if (need_update_) { auto &op_field = *this->desc_->mutable_ops(); @@ -224,6 +230,9 @@ void BindBlockDesc(py::module &m) { .def("append_op", &BlockDescBind::AppendOp, py::return_value_policy::reference) + .def("prepend_op", + &BlockDescBind::PrependOp, + py::return_value_policy::reference) .def("new_var", &BlockDescBind::NewVar, py::return_value_policy::reference); From f9f910a33bb33fd94fd645743518a4711a7e0017 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 25 Sep 2017 16:41:18 -0700 Subject: [PATCH 30/51] Complete op --- paddle/pybind/protobuf.cc | 150 +++++++++++++++++- .../v2/framework/tests/test_protobuf_descs.py | 36 ++++- 2 files changed, 181 insertions(+), 5 deletions(-) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 7af93cca99..673e0ab80b 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -14,8 +14,72 @@ limitations under the License. */ #include "paddle/pybind/protobuf.h" #include +#include #include "paddle/framework/attribute.h" +// Cast boost::variant for PyBind. +// Copy from +// https://github.com/pybind/pybind11/issues/576#issuecomment-269563199 +namespace pybind11 { +namespace detail { + +// Can be replaced by a generic lambda in C++14 +struct variant_caster_visitor : public boost::static_visitor { + return_value_policy policy; + handle parent; + + variant_caster_visitor(return_value_policy policy, handle parent) + : policy(policy), parent(parent) {} + + template + handle operator()(T const &src) const { + return make_caster::cast(src, policy, parent); + } +}; + +template +struct variant_caster; + +template