From 5ad9474bf7d2ad94578bd509957ae331cde36ab0 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 24 Jul 2017 10:36:10 +0800 Subject: [PATCH 01/51] add random op --- paddle/operators/CMakeLists.txt | 1 + paddle/operators/random_op.cc | 46 +++++++++++++++++++++++++++++++++ paddle/operators/random_op.cu | 6 +++++ paddle/operators/random_op.h | 29 +++++++++++++++++++++ 4 files changed, 82 insertions(+) create mode 100644 paddle/operators/random_op.cc create mode 100644 paddle/operators/random_op.cu create mode 100644 paddle/operators/random_op.h diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index a37720e509..14f8303c40 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -48,6 +48,7 @@ op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) +op_library(random_op SRCS random_op.cc random_op.cu) op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op softmax_op net) diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc new file mode 100644 index 0000000000..c219a0b67d --- /dev/null +++ b/paddle/operators/random_op.cc @@ -0,0 +1,46 @@ +#include "paddle/operators/random_op.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { +class RandomOp : public framework::OperatorWithKernel { +protected: + void InferShape( + const std::vector& inputs, + const std::vector& outputs) const override { + PADDLE_ENFORCE(inputs.size() == 0, "Input size of RandomOp must be zero."); + PADDLE_ENFORCE(outputs.size() == 1, "Output size of RandomOp must be one."); + PADDLE_ENFORCE(inputs[0] != nullptr && outputs[0] != nullptr, + "Inputs/Outputs of RandomOp must all be set."); + outputs[0]->set_dims(inputs[0]->dims()); + } +}; + +class RandomOpMaker : public framework::OpProtoAndCheckerMaker { +public: + RandomOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddAttr>("Shape", "The shape of matrix to be randomized"); + AddAttr("seed", "random seed generator.").SetDefault(1337); + AddAttr("mean", "mean value of random.").SetDefault(.0); + AddAttr("std", "minimum value of random value") + .SetDefault(1.0) + .LargerThan(.0); + AddOutput("Out", "output matrix of random op"); + AddComment(R"DOC( +Random Operator fill a matrix in normal distribution. +The eqution : Out = Random(Shape=(d0, d1, ...), Dtype, mean, std) +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP(random_op, + paddle::operators::RandomOp, + paddle::operators::RandomOpMaker); + +typedef paddle::operators::RandomOpKernel + RandomOpKernel_CPU_float; +REGISTER_OP_CPU_KERNEL(random_op, RandomOpKernel_CPU_float); diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu new file mode 100644 index 0000000000..50985f6699 --- /dev/null +++ b/paddle/operators/random_op.cu @@ -0,0 +1,6 @@ +#include "paddle/operators/random_op.h" +#include "paddle/framework/op_registry.h" + +typedef paddle::operators::RandomOpKernel + RandomOpKernel_GPU_float; +REGISTER_OP_GPU_KERNEL(random_op, RandomOpKernel_GPU_float); \ No newline at end of file diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h new file mode 100644 index 0000000000..123d9d6ffa --- /dev/null +++ b/paddle/operators/random_op.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include "glog/logging.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { +template +class RandomOpKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext& context) const override { + auto* output = context.Output(0)->GetMutable(); + output->mutable_data(context.GetPlace()); + + auto shape = context.op_.attrs_.at("Shape"); + auto mean = context.op_.attrs_.at("mean"); + auto std = context.op_.attrs_.at("std"); + auto seed = context.op_.attrs_.at("seed"); + // std::default_random_engine generator(seed); + // std::normal_distribution distribution(mean, std); + + framework::EigenMatrix::From(*output).device(*( + context.GetEigenDevice())) = framework::EigenMatrix::Random(); + } +}; + +} // namespace operators +} // namespace paddle From 0d554f1dea499e72ce0e0d6c240aac0add23cf49 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 24 Jul 2017 21:01:57 +0800 Subject: [PATCH 02/51] "add template fill function" --- paddle/operators/random_op.cc | 14 +++++++++++- paddle/operators/random_op.cu | 13 ++++++++++++ paddle/operators/random_op.h | 40 +++++++++++++++++++++++++++++------ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc index c219a0b67d..b85ff84220 100644 --- a/paddle/operators/random_op.cc +++ b/paddle/operators/random_op.cc @@ -3,6 +3,18 @@ namespace paddle { namespace operators { + +using paddle::platform::GPUPlace; +template +bool Gaussian( + Generator g, T* output, const int size, const T& mean, const T& std) { + std::normal_distribution distribution(mean, std); + for (int i = 0; i < size; ++i) { + output[i] = distribution(g()); + } + return true; +} + class RandomOp : public framework::OperatorWithKernel { protected: void InferShape( @@ -12,7 +24,7 @@ protected: PADDLE_ENFORCE(outputs.size() == 1, "Output size of RandomOp must be one."); PADDLE_ENFORCE(inputs[0] != nullptr && outputs[0] != nullptr, "Inputs/Outputs of RandomOp must all be set."); - outputs[0]->set_dims(inputs[0]->dims()); + outputs[0]->set_dims(context.op_.attrs_.at("shape")); } }; diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu index 50985f6699..ea1096aeb9 100644 --- a/paddle/operators/random_op.cu +++ b/paddle/operators/random_op.cu @@ -1,6 +1,19 @@ #include "paddle/operators/random_op.h" #include "paddle/framework/op_registry.h" +namespace paddle { +namespace operators { + +using paddle::platform::GPUPlace; +template +bool Gaussian(Generator g, T* output, const int size, const T& mean, const T& std) { + return curandGenerateNormal(g, output, size, mean, std); +} + +} // operators +} // paddle + + typedef paddle::operators::RandomOpKernel RandomOpKernel_GPU_float; REGISTER_OP_GPU_KERNEL(random_op, RandomOpKernel_GPU_float); \ No newline at end of file diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index 123d9d6ffa..1b5fb16de1 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -6,24 +6,52 @@ namespace paddle { namespace operators { +template +bool Gaussian( + Generator g, T* output, const int size, const T& mean, const T& std); + template class RandomOpKernel : public framework::OpKernel { public: void Compute(const framework::KernelContext& context) const override { - auto* output = context.Output(0)->GetMutable(); - output->mutable_data(context.GetPlace()); - - auto shape = context.op_.attrs_.at("Shape"); auto mean = context.op_.attrs_.at("mean"); auto std = context.op_.attrs_.at("std"); auto seed = context.op_.attrs_.at("seed"); + auto* output = context.Output(0)->GetMutable(); + output->mutable_data(context.GetPlace()); + + Gaussian(, output, output->size(), mean, std) : // std::default_random_engine generator(seed); // std::normal_distribution distribution(mean, std); - framework::EigenMatrix::From(*output).device(*( - context.GetEigenDevice())) = framework::EigenMatrix::Random(); + // framework::EigenMatrix::From(*output).device(*( + // context.GetEigenDevice())) = + // framework::EigenMatrix::Random(); } }; +// using paddle::platform::CPUPlace; +// template +// class RandomOpKernel : public framework::OpKernel { +// public: +// void Compute(const framework::KernelContext& context) const override { + +// std::unique_ptr generator(seed); +// for(size_t i=0; i < output->size(); ++i) { +// output[i] = distribution(generator()); +// } +// } + +// }; + +// using paddle::platform::GPUPlace; +// template +// class RandomOpKernel : public framework::OpKernel { +// public: +// void Compute(const framework::KernelContext& context) const override { + +// } +// } + } // namespace operators } // namespace paddle From 6f80b5f1df2b4d77857338f44c3159388602457b Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 25 Jul 2017 12:00:47 +0800 Subject: [PATCH 03/51] "move to template function" --- paddle/operators/random_op.cc | 34 ++++++++++++++++++----- paddle/operators/random_op.cu | 7 ++--- paddle/operators/random_op.h | 28 +++++++++++++------ paddle/platform/device_context.h | 46 ++++++++++++++++++++------------ 4 files changed, 81 insertions(+), 34 deletions(-) diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc index b85ff84220..a536ee74b4 100644 --- a/paddle/operators/random_op.cc +++ b/paddle/operators/random_op.cc @@ -1,13 +1,33 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "paddle/operators/random_op.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace operators { -using paddle::platform::GPUPlace; -template -bool Gaussian( - Generator g, T* output, const int size, const T& mean, const T& std) { +// using paddle::platform::CPUPlace; +// template +template +bool Gaussian(platform::CPUDeviceContext& ctx, + framework::Tensor* output, + const int size, + const T& mean, + const T& std, + const T& seed) { + auto g = ctx.RandGenerator(seed); std::normal_distribution distribution(mean, std); for (int i = 0; i < size; ++i) { output[i] = distribution(g()); @@ -24,7 +44,9 @@ protected: PADDLE_ENFORCE(outputs.size() == 1, "Output size of RandomOp must be one."); PADDLE_ENFORCE(inputs[0] != nullptr && outputs[0] != nullptr, "Inputs/Outputs of RandomOp must all be set."); - outputs[0]->set_dims(context.op_.attrs_.at("shape")); + outputs[0]->Resize( + framework::make_ddim(this->GetAttr>("shape"))); + // outputs[0]->set_dims(context.op_.attrs_.at("shape")); } }; @@ -32,7 +54,7 @@ class RandomOpMaker : public framework::OpProtoAndCheckerMaker { public: RandomOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr>("Shape", "The shape of matrix to be randomized"); + AddAttr>("shape", "The shape of matrix to be randomized"); AddAttr("seed", "random seed generator.").SetDefault(1337); AddAttr("mean", "mean value of random.").SetDefault(.0); AddAttr("std", "minimum value of random value") diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu index ea1096aeb9..40b642d8a1 100644 --- a/paddle/operators/random_op.cu +++ b/paddle/operators/random_op.cu @@ -4,9 +4,10 @@ namespace paddle { namespace operators { -using paddle::platform::GPUPlace; -template -bool Gaussian(Generator g, T* output, const int size, const T& mean, const T& std) { +template +bool Gaussian(platform::CUDADeviceContext &ctx, framework::Tensor* output, + const int size, const T& mean, const T& std, const T& seed) { + auto g = RandGenerator(seed); return curandGenerateNormal(g, output, size, mean, std); } diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index 1b5fb16de1..a82b3afec8 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -6,21 +6,33 @@ namespace paddle { namespace operators { -template -bool Gaussian( - Generator g, T* output, const int size, const T& mean, const T& std); +template +bool Gaussian(DeviceContext& ctx, + framework::Tensor* output, + const int size, + const T& mean, + const T& std, + const T& seed); template class RandomOpKernel : public framework::OpKernel { public: void Compute(const framework::KernelContext& context) const override { - auto mean = context.op_.attrs_.at("mean"); - auto std = context.op_.attrs_.at("std"); - auto seed = context.op_.attrs_.at("seed"); + auto mean = context.op_.GetAttr("mean"); + auto std = context.op_.GetAttr("std"); + auto seed = context.op_.GetAttr("seed"); auto* output = context.Output(0)->GetMutable(); output->mutable_data(context.GetPlace()); - - Gaussian(, output, output->size(), mean, std) : + Gaussian(context.device_context_, + output, + framework::product(output->dims()), + mean, + std, + seed); + // Gaussian(context.device_context_, + // output, + // framework::product(output->dims()), + // mean, std, seed); // std::default_random_engine generator(seed); // std::normal_distribution distribution(mean, std); diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index fe6f13e399..b8af4abd7f 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -39,6 +39,7 @@ class DeviceContext { class CPUDeviceContext : public DeviceContext { public: + typedef std::mt19937 random_generator_type; CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); } Eigen::DefaultDevice* eigen_device() const { return eigen_device_.get(); } @@ -48,7 +49,17 @@ class CPUDeviceContext : public DeviceContext { return retv; } + const random_generator_type& RandGenerator(const int seed) { + if (!rand_generator_) { + random_seed_ = seed; + rand_generator_.reset(new random_generator_type(random_seed_)); + } + return *rand_generator_.get(); + } + private: + int random_seed_; + std::unique_ptr rand_generator_; std::unique_ptr eigen_device_; }; @@ -87,6 +98,24 @@ class CUDADeviceContext : public DeviceContext { "cudaStreamSynchronize failed"); } + const curandGenerator_t RandGenerator(const int seed) { + if (!rand_generator_) { + random_seed_ = seed; + GPUPlaceGuard guard(gpu_place_); + PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( + &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT), + "curandCreateGenerator failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( + rand_generator_, random_seed_), + "curandSetPseudoRandomGeneratorSeed failed"); + PADDLE_ENFORCE( + paddle::platform::dynload::curandSetStream(rand_generator_, stream_), + "curandSetStream failed"); + } + return rand_generator_; + } + cudaStream_t stream() { return stream_; } Eigen::GpuDevice* eigen_device() const { return eigen_device_.get(); } @@ -115,23 +144,6 @@ class CUDADeviceContext : public DeviceContext { return dnn_handle_; } - curandGenerator_t curand_generator() { - if (!rand_generator_) { - GPUPlaceGuard guard(gpu_place_); - PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( - &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT), - "curandCreateGenerator failed"); - PADDLE_ENFORCE( - paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( - rand_generator_, random_seed_), - "curandSetPseudoRandomGeneratorSeed failed"); - PADDLE_ENFORCE( - paddle::platform::dynload::curandSetStream(rand_generator_, stream_), - "curandSetStream failed"); - } - return rand_generator_; - } - ~CUDADeviceContext() { Wait(); if (blas_handle_) { From 32c15a291917786ba136b76adb289aaa78527252 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 25 Jul 2017 15:00:24 +0800 Subject: [PATCH 04/51] "random op test" --- paddle/operators/random_op.h | 1 + paddle/pybind/pybind.cc | 1 + .../v2/framework/tests/test_plain_net.py | 12 ++++---- .../v2/framework/tests/test_random_op.py | 28 +++++++++++++++++++ 4 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_random_op.py diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index a82b3afec8..bee6cc9cbd 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -6,6 +6,7 @@ namespace paddle { namespace operators { + template bool Gaussian(DeviceContext& ctx, framework::Tensor* output, diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index d48a948d21..f6e9013471 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -36,6 +36,7 @@ USE_OP(mul); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); +USE_OP(random_op); template void ExposeOperator(ClassType& m) { diff --git a/python/paddle/v2/framework/tests/test_plain_net.py b/python/paddle/v2/framework/tests/test_plain_net.py index 2b919aca28..53c8dd6c22 100644 --- a/python/paddle/v2/framework/tests/test_plain_net.py +++ b/python/paddle/v2/framework/tests/test_plain_net.py @@ -16,13 +16,13 @@ class TestNet(unittest.TestCase): net.complete_add_op(True) expected = ''' -Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). - Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). + Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). + Op(add_two), inputs:(X, Y), outputs:(Out). + Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). Op(fc), inputs:(X, w, @EMPTY@), outputs:(fc.out, @TEMP@fc@0). - Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). - Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). -''' + Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). + Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). + ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/framework/tests/test_random_op.py b/python/paddle/v2/framework/tests/test_random_op.py new file mode 100644 index 0000000000..eb69f35edf --- /dev/null +++ b/python/paddle/v2/framework/tests/test_random_op.py @@ -0,0 +1,28 @@ +import unittest +import paddle.v2.framework.create_op_creation_methods as creation +import paddle.v2.framework.core as core +from op_test_util import OpTestMeta +import numpy + + +class TestRandomOp(unittest.TestCase): + def test_random(self): + scope = core.Scope(None) + # Out = scope.create_var("Out") + op = creation.op_creations.random( + shape=[1000, 1000], mean=5.0, std=1.0, seed=1701, Out="Out") + for out in op.outputs(): + if scope.get_var(out) is None: + scope.create_var(out).get_tensor() + + tensor = scope.get_var("Y").get_tensor() + op.infer_shape(scope) + self.assertEqual([1000, 1000], tensor.shape()) + ctx = core.DeviceContext.cpu_context() + op.run(scope, ctx) + self.assertAlmostEqual(numpy.std(tensor), 1.0) + self.assertAlmostEqual(numpy.mean(tensor), 5.0) + + +if __name__ == '__main__': + unittest.main() From 30a47fe8a321fd92b05fcf71e3668862176d1f91 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 25 Jul 2017 15:19:13 +0800 Subject: [PATCH 05/51] "link pybind11" --- paddle/operators/random_op.cc | 4 ++-- paddle/operators/random_op.cu | 2 +- paddle/pybind/CMakeLists.txt | 2 +- paddle/pybind/pybind.cc | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc index a536ee74b4..05a3dbd9f4 100644 --- a/paddle/operators/random_op.cc +++ b/paddle/operators/random_op.cc @@ -71,10 +71,10 @@ The eqution : Out = Random(Shape=(d0, d1, ...), Dtype, mean, std) } // namespace operators } // namespace paddle -REGISTER_OP(random_op, +REGISTER_OP(random, paddle::operators::RandomOp, paddle::operators::RandomOpMaker); typedef paddle::operators::RandomOpKernel RandomOpKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(random_op, RandomOpKernel_CPU_float); +REGISTER_OP_CPU_KERNEL(random, RandomOpKernel_CPU_float); diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu index 40b642d8a1..85054974ac 100644 --- a/paddle/operators/random_op.cu +++ b/paddle/operators/random_op.cu @@ -17,4 +17,4 @@ bool Gaussian(platform::CUDADeviceContext &ctx, framework::Tensor* output, typedef paddle::operators::RandomOpKernel RandomOpKernel_GPU_float; -REGISTER_OP_GPU_KERNEL(random_op, RandomOpKernel_GPU_float); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(random, RandomOpKernel_GPU_float); \ No newline at end of file diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index fd1a142b40..8010369b41 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python - add_op fc_op sgd_op cross_entropy_op) + add_op fc_op sgd_op cross_entropy_op random_op) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index f6e9013471..1138b07ec4 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -36,7 +36,7 @@ USE_OP(mul); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); -USE_OP(random_op); +USE_OP(random); template void ExposeOperator(ClassType& m) { From 2b3e362136f94a20503cdb1133762e33b18bf6f3 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 25 Jul 2017 15:50:51 +0800 Subject: [PATCH 06/51] "template specialization link include" --- paddle/operators/random_op.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index bee6cc9cbd..3eeb1f87c8 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -15,6 +15,21 @@ bool Gaussian(DeviceContext& ctx, const T& std, const T& seed); +template +bool Gaussian(platform::CPUDeviceContext& ctx, + framework::Tensor* output, + const int size, + const T& mean, + const T& std, + const T& seed); +template +bool Gaussian(platform::CUDADeviceContext& ctx, + framework::Tensor* output, + const int size, + const T& mean, + const T& std, + const T& seed); + template class RandomOpKernel : public framework::OpKernel { public: From 984225ecf198525a134acbda0fb6cab177a59ebd Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 25 Jul 2017 16:07:08 +0800 Subject: [PATCH 07/51] "fix operator" --- paddle/framework/operator.cc | 14 ++++- paddle/operators/random_op.cc | 23 ++------ paddle/operators/random_op.cu | 13 ----- paddle/operators/random_op.h | 54 +++++++------------ .../paddle/v2/framework/tests/CMakeLists.txt | 3 +- .../v2/framework/tests/test_random_op.py | 7 +-- 6 files changed, 39 insertions(+), 75 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 1e57e9a20f..18e327089f 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/framework/operator.h" +#include +#include namespace paddle { namespace framework { @@ -95,6 +95,16 @@ std::string OperatorBase::DebugString() const { ss << ", "; } } + ss << "), "; + ss << "Attrs:("; + size_t i = 0; + for (auto& attr : attrs_) { + ss << attr.first; + if (i != attrs_.size() - 1) { + ss << ", "; + } + i++; + } ss << ")."; return ss.str(); } diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc index 05a3dbd9f4..726f6504e7 100644 --- a/paddle/operators/random_op.cc +++ b/paddle/operators/random_op.cc @@ -13,28 +13,12 @@ limitations under the License. */ #include "paddle/operators/random_op.h" +#include "glog/logging.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace operators { -// using paddle::platform::CPUPlace; -// template -template -bool Gaussian(platform::CPUDeviceContext& ctx, - framework::Tensor* output, - const int size, - const T& mean, - const T& std, - const T& seed) { - auto g = ctx.RandGenerator(seed); - std::normal_distribution distribution(mean, std); - for (int i = 0; i < size; ++i) { - output[i] = distribution(g()); - } - return true; -} - class RandomOp : public framework::OperatorWithKernel { protected: void InferShape( @@ -42,11 +26,10 @@ protected: const std::vector& outputs) const override { PADDLE_ENFORCE(inputs.size() == 0, "Input size of RandomOp must be zero."); PADDLE_ENFORCE(outputs.size() == 1, "Output size of RandomOp must be one."); - PADDLE_ENFORCE(inputs[0] != nullptr && outputs[0] != nullptr, - "Inputs/Outputs of RandomOp must all be set."); + PADDLE_ENFORCE(outputs[0] != nullptr, + "Outputs of RandomOp must all be set."); outputs[0]->Resize( framework::make_ddim(this->GetAttr>("shape"))); - // outputs[0]->set_dims(context.op_.attrs_.at("shape")); } }; diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu index 85054974ac..b417666c98 100644 --- a/paddle/operators/random_op.cu +++ b/paddle/operators/random_op.cu @@ -1,19 +1,6 @@ #include "paddle/operators/random_op.h" #include "paddle/framework/op_registry.h" -namespace paddle { -namespace operators { - -template -bool Gaussian(platform::CUDADeviceContext &ctx, framework::Tensor* output, - const int size, const T& mean, const T& std, const T& seed) { - auto g = RandGenerator(seed); - return curandGenerateNormal(g, output, size, mean, std); -} - -} // operators -} // paddle - typedef paddle::operators::RandomOpKernel RandomOpKernel_GPU_float; diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index 3eeb1f87c8..f8e1a90a1d 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -13,7 +13,9 @@ bool Gaussian(DeviceContext& ctx, const int size, const T& mean, const T& std, - const T& seed); + const T& seed) { + return false; +} template bool Gaussian(platform::CPUDeviceContext& ctx, @@ -21,14 +23,27 @@ bool Gaussian(platform::CPUDeviceContext& ctx, const int size, const T& mean, const T& std, - const T& seed); + const T& seed) { + auto g = ctx.RandGenerator(seed); + std::normal_distribution distribution(mean, std); + for (int i = 0; i < size; ++i) { + output[i] = distribution(g); + } + return true; +} + +#ifndef PADDLE_ONLY_CPU template bool Gaussian(platform::CUDADeviceContext& ctx, framework::Tensor* output, const int size, const T& mean, const T& std, - const T& seed); + const T& seed) { + auto g = RandGenerator(seed); + return curandGenerateNormal(g, output, size, mean, std); +} +#endif template class RandomOpKernel : public framework::OpKernel { @@ -45,41 +60,8 @@ public: mean, std, seed); - // Gaussian(context.device_context_, - // output, - // framework::product(output->dims()), - // mean, std, seed); - // std::default_random_engine generator(seed); - // std::normal_distribution distribution(mean, std); - - // framework::EigenMatrix::From(*output).device(*( - // context.GetEigenDevice())) = - // framework::EigenMatrix::Random(); } }; -// using paddle::platform::CPUPlace; -// template -// class RandomOpKernel : public framework::OpKernel { -// public: -// void Compute(const framework::KernelContext& context) const override { - -// std::unique_ptr generator(seed); -// for(size_t i=0; i < output->size(); ++i) { -// output[i] = distribution(generator()); -// } -// } - -// }; - -// using paddle::platform::GPUPlace; -// template -// class RandomOpKernel : public framework::OpKernel { -// public: -// void Compute(const framework::KernelContext& context) const override { - -// } -// } - } // namespace operators } // namespace paddle diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index b3eb2ef8a8..254e8d37d1 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -12,4 +12,5 @@ add_python_test(test_framework test_mul_op.py test_sigmoid_op.py test_softmax_op.py - test_rowwise_add_op.py) + test_rowwise_add_op.py + test_random_op.py) diff --git a/python/paddle/v2/framework/tests/test_random_op.py b/python/paddle/v2/framework/tests/test_random_op.py index eb69f35edf..e2aa9bdfc2 100644 --- a/python/paddle/v2/framework/tests/test_random_op.py +++ b/python/paddle/v2/framework/tests/test_random_op.py @@ -15,13 +15,14 @@ class TestRandomOp(unittest.TestCase): if scope.get_var(out) is None: scope.create_var(out).get_tensor() - tensor = scope.get_var("Y").get_tensor() + tensor = scope.get_var("Out").get_tensor() op.infer_shape(scope) self.assertEqual([1000, 1000], tensor.shape()) ctx = core.DeviceContext.cpu_context() op.run(scope, ctx) - self.assertAlmostEqual(numpy.std(tensor), 1.0) - self.assertAlmostEqual(numpy.mean(tensor), 5.0) + tensor_array = numpy.array(tensor) + self.assertAlmostEqual(numpy.std(tensor_array), 1.0) + self.assertAlmostEqual(numpy.mean(tensor_array), 5.0) if __name__ == '__main__': From 11f9f5fb172f620d5221c93fe26196ebd244df79 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 26 Jul 2017 00:40:37 +0800 Subject: [PATCH 08/51] "fix const dependency hell" --- paddle/framework/operator.cc | 4 +-- paddle/framework/operator.h | 14 ++++----- paddle/operators/random_op.h | 49 ++++++++++++++++---------------- paddle/platform/device_context.h | 4 +-- 4 files changed, 36 insertions(+), 35 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 18e327089f..0a317dffa9 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -22,14 +22,14 @@ namespace framework { template <> Eigen::DefaultDevice* KernelContext::GetEigenDevice< platform::CPUPlace, Eigen::DefaultDevice>() const { - return device_context_.get_eigen_device(); + return device_context_->get_eigen_device(); } #ifndef PADDLE_ONLY_CPU template <> Eigen::GpuDevice* KernelContext::GetEigenDevice() const { - return device_context_.get_eigen_device(); + return device_context_->get_eigen_device(); } #endif diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index f59314f828..5db041ea32 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -88,7 +88,7 @@ class OperatorBase { /// Net will call this function to Run an op. virtual void Run(const std::shared_ptr& scope, - const platform::DeviceContext& dev_ctx) const = 0; + platform::DeviceContext& dev_ctx) const = 0; // Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; @@ -113,8 +113,8 @@ class OperatorBase { class KernelContext { public: KernelContext(const OperatorBase* op, const std::shared_ptr& scope, - const platform::DeviceContext& device_context) - : op_(*op), scope_(scope), device_context_(device_context) {} + platform::DeviceContext& device_context) + : op_(*op), scope_(scope), device_context_(&device_context) {} const Variable* Input(int index) const { return scope_->GetVariable(op_.inputs_[index]); @@ -155,11 +155,11 @@ class KernelContext { typename EigenDeviceConverter::EigenDeviceType> DeviceType* GetEigenDevice() const; - platform::Place GetPlace() const { return device_context_.GetPlace(); } + platform::Place GetPlace() const { return device_context_->GetPlace(); } const OperatorBase& op_; - const std::shared_ptr& scope_; - const platform::DeviceContext& device_context_; + const std::shared_ptr scope_; + platform::DeviceContext* device_context_; }; class OpKernel { @@ -213,7 +213,7 @@ class OperatorWithKernel : public OperatorBase { std::unordered_map, OpKernelHash>; void Run(const std::shared_ptr& scope, - const platform::DeviceContext& dev_ctx) const final { + platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); opKernel->Compute(KernelContext(this, scope, dev_ctx)); } diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index f8e1a90a1d..8231b6b613 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -7,25 +7,15 @@ namespace paddle { namespace operators { -template -bool Gaussian(DeviceContext& ctx, - framework::Tensor* output, - const int size, - const T& mean, - const T& std, - const T& seed) { - return false; -} - template -bool Gaussian(platform::CPUDeviceContext& ctx, - framework::Tensor* output, +bool Gaussian(platform::CPUDeviceContext* ctx, + T* output, const int size, const T& mean, const T& std, const T& seed) { - auto g = ctx.RandGenerator(seed); - std::normal_distribution distribution(mean, std); + auto g = ctx->RandGenerator(seed); + std::normal_distribution distribution(mean, std); for (int i = 0; i < size; ++i) { output[i] = distribution(g); } @@ -34,13 +24,13 @@ bool Gaussian(platform::CPUDeviceContext& ctx, #ifndef PADDLE_ONLY_CPU template -bool Gaussian(platform::CUDADeviceContext& ctx, - framework::Tensor* output, +bool Gaussian(platform::CUDADeviceContext* ctx, + T* output, const int size, const T& mean, const T& std, const T& seed) { - auto g = RandGenerator(seed); + auto g = ctx->RandGenerator(seed); return curandGenerateNormal(g, output, size, mean, std); } #endif @@ -53,13 +43,24 @@ public: auto std = context.op_.GetAttr("std"); auto seed = context.op_.GetAttr("seed"); auto* output = context.Output(0)->GetMutable(); - output->mutable_data(context.GetPlace()); - Gaussian(context.device_context_, - output, - framework::product(output->dims()), - mean, - std, - seed); + auto place = context.GetPlace(); + if (platform::is_cpu_place(place)) { + Gaussian( + dynamic_cast(context.device_context_), + output->mutable_data(context.GetPlace()), + framework::product(output->dims()), + mean, + std, + seed); + } else { + Gaussian( + dynamic_cast(context.device_context_), + output->mutable_data(context.GetPlace()), + framework::product(output->dims()), + mean, + std, + seed); + } } }; diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index b8af4abd7f..7bc34bd545 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -49,7 +49,7 @@ class CPUDeviceContext : public DeviceContext { return retv; } - const random_generator_type& RandGenerator(const int seed) { + random_generator_type& RandGenerator(const int seed) { if (!rand_generator_) { random_seed_ = seed; rand_generator_.reset(new random_generator_type(random_seed_)); @@ -98,7 +98,7 @@ class CUDADeviceContext : public DeviceContext { "cudaStreamSynchronize failed"); } - const curandGenerator_t RandGenerator(const int seed) { + curandGenerator_t RandGenerator(const int seed) { if (!rand_generator_) { random_seed_ = seed; GPUPlaceGuard guard(gpu_place_); From 9a16327b15cc5c094a3f6373e6bc089d26ee5b00 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 26 Jul 2017 00:44:42 +0800 Subject: [PATCH 09/51] "remove const qualify" --- paddle/framework/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/net.h b/paddle/framework/net.h index 3264f1f565..d3e3e80d5b 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -71,7 +71,7 @@ class PlainNet : public Net { * will be used. */ void Run(const std::shared_ptr& scope, - const platform::DeviceContext& dev_ctx) const override { + platform::DeviceContext& dev_ctx) const override { for (auto& op : ops_) { op->Run(scope, dev_ctx); } From 69b1b26511d6a838b4542e7844fd13fd257d96b8 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 26 Jul 2017 00:47:22 +0800 Subject: [PATCH 10/51] "cpu only macro" --- paddle/operators/random_op.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index 8231b6b613..26dba130e4 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -53,6 +53,7 @@ public: std, seed); } else { +#ifndef PADDLE_ONLY_CPU Gaussian( dynamic_cast(context.device_context_), output->mutable_data(context.GetPlace()), @@ -60,6 +61,7 @@ public: mean, std, seed); +#endif } } }; From a22567ebefbd29644603b66f44273bfb33fc8434 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 26 Jul 2017 00:51:46 +0800 Subject: [PATCH 11/51] "fix almost equal error" --- python/paddle/v2/framework/tests/test_random_op.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_random_op.py b/python/paddle/v2/framework/tests/test_random_op.py index e2aa9bdfc2..447e3e39ab 100644 --- a/python/paddle/v2/framework/tests/test_random_op.py +++ b/python/paddle/v2/framework/tests/test_random_op.py @@ -21,8 +21,8 @@ class TestRandomOp(unittest.TestCase): ctx = core.DeviceContext.cpu_context() op.run(scope, ctx) tensor_array = numpy.array(tensor) - self.assertAlmostEqual(numpy.std(tensor_array), 1.0) - self.assertAlmostEqual(numpy.mean(tensor_array), 5.0) + self.assertAlmostEqual(numpy.mean(tensor_array), 5.0, places=3) + self.assertAlmostEqual(numpy.std(tensor_array), 1.0, places=3) if __name__ == '__main__': From 572133400d3f4073d9a9206db5ed1ced3e39623d Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 30 Jul 2017 22:13:26 +0800 Subject: [PATCH 12/51] "update the compute kernel" --- paddle/framework/operator.h | 8 ++--- paddle/operators/random_op.cc | 47 +++++++++++++++++++------- paddle/operators/random_op.cu | 25 +++++++++++++- paddle/operators/random_op.h | 57 ++------------------------------ paddle/platform/device_context.h | 19 +++++++---- 5 files changed, 77 insertions(+), 79 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 5db041ea32..9ba661968c 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -88,7 +88,7 @@ class OperatorBase { /// Net will call this function to Run an op. virtual void Run(const std::shared_ptr& scope, - platform::DeviceContext& dev_ctx) const = 0; + const platform::DeviceContext& dev_ctx) const = 0; // Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; @@ -113,7 +113,7 @@ class OperatorBase { class KernelContext { public: KernelContext(const OperatorBase* op, const std::shared_ptr& scope, - platform::DeviceContext& device_context) + const platform::DeviceContext& device_context) : op_(*op), scope_(scope), device_context_(&device_context) {} const Variable* Input(int index) const { @@ -159,7 +159,7 @@ class KernelContext { const OperatorBase& op_; const std::shared_ptr scope_; - platform::DeviceContext* device_context_; + const platform::DeviceContext* device_context_; }; class OpKernel { @@ -213,7 +213,7 @@ class OperatorWithKernel : public OperatorBase { std::unordered_map, OpKernelHash>; void Run(const std::shared_ptr& scope, - platform::DeviceContext& dev_ctx) const final { + const platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); opKernel->Compute(KernelContext(this, scope, dev_ctx)); } diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc index 726f6504e7..16e526dc4f 100644 --- a/paddle/operators/random_op.cc +++ b/paddle/operators/random_op.cc @@ -19,7 +19,28 @@ namespace paddle { namespace operators { -class RandomOp : public framework::OperatorWithKernel { +template +class GaussianRandomOpKernel + : public framework::OpKernel { +public: + void Compute(const framework::KernelContext& context) const override { + auto mean = context.op_.GetAttr("mean"); + auto std = context.op_.GetAttr("std"); + // auto seed = context.op_.GetAttr("seed"); + auto* output = context.Output(0)->GetMutable(); + T* r = output->mutable_data(context.GetPlace()); + auto ctx = + static_cast(context.device_context_); + // generator need to modify context + auto g = const_cast(ctx)->RandGenerator(); + std::normal_distribution distribution(mean, std); + for (int i = 0; i < framework::product(output->dims()); ++i) { + r[i] = distribution(g); + } + } +}; + +class GaussianRandomOp : public framework::OperatorWithKernel { protected: void InferShape( const std::vector& inputs, @@ -33,20 +54,21 @@ protected: } }; -class RandomOpMaker : public framework::OpProtoAndCheckerMaker { +class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { public: - RandomOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + GaussianRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddAttr>("shape", "The shape of matrix to be randomized"); - AddAttr("seed", "random seed generator.").SetDefault(1337); + // AddAttr("seed", "random seed generator.").SetDefault(1337); AddAttr("mean", "mean value of random.").SetDefault(.0); AddAttr("std", "minimum value of random value") .SetDefault(1.0) .LargerThan(.0); AddOutput("Out", "output matrix of random op"); AddComment(R"DOC( -Random Operator fill a matrix in normal distribution. -The eqution : Out = Random(Shape=(d0, d1, ...), Dtype, mean, std) +GaussianRandom Operator fill a matrix in normal distribution. +The eqution : Out = GaussianRandom(Shape=(d0, d1, ...), Dtype, mean, std) )DOC"); } }; @@ -54,10 +76,11 @@ The eqution : Out = Random(Shape=(d0, d1, ...), Dtype, mean, std) } // namespace operators } // namespace paddle -REGISTER_OP(random, - paddle::operators::RandomOp, - paddle::operators::RandomOpMaker); +REGISTER_OP(gaussian_random, + paddle::operators::GaussianRandomOp, + paddle::operators::GaussianRandomOpMaker); -typedef paddle::operators::RandomOpKernel - RandomOpKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(random, RandomOpKernel_CPU_float); +typedef paddle::operators::GaussianRandomOpKernel + GaussianRandomOpKernel_CPU_float; +REGISTER_OP_CPU_KERNEL(gaussian_random, GaussianRandomOpKernel_CPU_float); diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu index b417666c98..78a00bc899 100644 --- a/paddle/operators/random_op.cu +++ b/paddle/operators/random_op.cu @@ -1,7 +1,30 @@ #include "paddle/operators/random_op.h" #include "paddle/framework/op_registry.h" +namespace paddle { +namespace operators { + +template +class GaussianRandomOpKernel : public framework::OpKernel { +public: + void Compute(const framework::KernelContext& context) const override { + auto mean = context.op_.GetAttr("mean"); + auto std = context.op_.GetAttr("std"); + auto* output = context.Output(0)->GetMutable(); + T* r = output->mutable_data(context.GetPlace()); + auto ctx = static_cast + (context.device_context_); + // generator need to modify context + auto g = const_cast(ctx)->RandGenerator(); + curandGenerateNormal(g, r, framework::product(output->dims()), mean, std); -typedef paddle::operators::RandomOpKernel + } +}; + +} // namespace operators +} // namespace paddle + + +typedef paddle::operators::GaussianRandomOpKernel RandomOpKernel_GPU_float; REGISTER_OP_GPU_KERNEL(random, RandomOpKernel_GPU_float); \ No newline at end of file diff --git a/paddle/operators/random_op.h b/paddle/operators/random_op.h index 26dba130e4..b463a171d9 100644 --- a/paddle/operators/random_op.h +++ b/paddle/operators/random_op.h @@ -7,63 +7,10 @@ namespace paddle { namespace operators { -template -bool Gaussian(platform::CPUDeviceContext* ctx, - T* output, - const int size, - const T& mean, - const T& std, - const T& seed) { - auto g = ctx->RandGenerator(seed); - std::normal_distribution distribution(mean, std); - for (int i = 0; i < size; ++i) { - output[i] = distribution(g); - } - return true; -} - -#ifndef PADDLE_ONLY_CPU -template -bool Gaussian(platform::CUDADeviceContext* ctx, - T* output, - const int size, - const T& mean, - const T& std, - const T& seed) { - auto g = ctx->RandGenerator(seed); - return curandGenerateNormal(g, output, size, mean, std); -} -#endif - template -class RandomOpKernel : public framework::OpKernel { +class GaussianRandomOpKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext& context) const override { - auto mean = context.op_.GetAttr("mean"); - auto std = context.op_.GetAttr("std"); - auto seed = context.op_.GetAttr("seed"); - auto* output = context.Output(0)->GetMutable(); - auto place = context.GetPlace(); - if (platform::is_cpu_place(place)) { - Gaussian( - dynamic_cast(context.device_context_), - output->mutable_data(context.GetPlace()), - framework::product(output->dims()), - mean, - std, - seed); - } else { -#ifndef PADDLE_ONLY_CPU - Gaussian( - dynamic_cast(context.device_context_), - output->mutable_data(context.GetPlace()), - framework::product(output->dims()), - mean, - std, - seed); -#endif - } - } + void Compute(const framework::KernelContext& context) const override {} }; } // namespace operators diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 7bc34bd545..239c25a90c 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/platform/gpu_info.h" #define EIGEN_USE_GPU #endif +#include #include #include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -40,7 +41,10 @@ class DeviceContext { class CPUDeviceContext : public DeviceContext { public: typedef std::mt19937 random_generator_type; - CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); } + CPUDeviceContext() { + random_seed_ = std::chrono::system_clock::now().time_since_epoch().count(); + eigen_device_.reset(new Eigen::DefaultDevice()); + } Eigen::DefaultDevice* eigen_device() const { return eigen_device_.get(); } @@ -49,16 +53,15 @@ class CPUDeviceContext : public DeviceContext { return retv; } - random_generator_type& RandGenerator(const int seed) { + random_generator_type& RandGenerator() { if (!rand_generator_) { - random_seed_ = seed; rand_generator_.reset(new random_generator_type(random_seed_)); } return *rand_generator_.get(); } private: - int random_seed_; + unsigned random_seed_; std::unique_ptr rand_generator_; std::unique_ptr eigen_device_; }; @@ -81,6 +84,9 @@ class GPUPlaceGuard { class CUDADeviceContext : public DeviceContext { public: + CUDADeviceContext() { + random_seed_ = std::chrono::system_clock::now().time_since_epoch().count(); + } explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { GPUPlaceGuard guard(gpu_place_); PADDLE_ENFORCE(cudaStreamCreate(&stream_), "cudaStreamCreate failed"); @@ -98,9 +104,8 @@ class CUDADeviceContext : public DeviceContext { "cudaStreamSynchronize failed"); } - curandGenerator_t RandGenerator(const int seed) { + curandGenerator_t RandGenerator() { if (!rand_generator_) { - random_seed_ = seed; GPUPlaceGuard guard(gpu_place_); PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT), @@ -177,7 +182,7 @@ class CUDADeviceContext : public DeviceContext { cudnnHandle_t dnn_handle_{nullptr}; - int random_seed_; + unsigned random_seed_; curandGenerator_t rand_generator_{nullptr}; }; From 36d7e1fdae2ffc937c29792882d26c492e9f6784 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 30 Jul 2017 22:20:42 +0800 Subject: [PATCH 13/51] "fix const hell" --- paddle/framework/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/net.h b/paddle/framework/net.h index d3e3e80d5b..3264f1f565 100644 --- a/paddle/framework/net.h +++ b/paddle/framework/net.h @@ -71,7 +71,7 @@ class PlainNet : public Net { * will be used. */ void Run(const std::shared_ptr& scope, - platform::DeviceContext& dev_ctx) const override { + const platform::DeviceContext& dev_ctx) const override { for (auto& op : ops_) { op->Run(scope, dev_ctx); } From 0253f2c46fcab2410d01004115a16db7f9b393be Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 30 Jul 2017 22:26:20 +0800 Subject: [PATCH 14/51] "fix bind python error" --- paddle/pybind/pybind.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 1138b07ec4..88deb56207 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -36,7 +36,7 @@ USE_OP(mul); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); -USE_OP(random); +USE_OP(gaussian_random); template void ExposeOperator(ClassType& m) { From 4d8ece8a3c46b0c4ddec77dd3f4f70275c5c274c Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 30 Jul 2017 22:34:22 +0800 Subject: [PATCH 15/51] "update" --- python/paddle/v2/framework/tests/test_random_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/test_random_op.py b/python/paddle/v2/framework/tests/test_random_op.py index 447e3e39ab..1c9ff350df 100644 --- a/python/paddle/v2/framework/tests/test_random_op.py +++ b/python/paddle/v2/framework/tests/test_random_op.py @@ -9,7 +9,7 @@ class TestRandomOp(unittest.TestCase): def test_random(self): scope = core.Scope(None) # Out = scope.create_var("Out") - op = creation.op_creations.random( + op = creation.op_creations.gaussian_random( shape=[1000, 1000], mean=5.0, std=1.0, seed=1701, Out="Out") for out in op.outputs(): if scope.get_var(out) is None: From 47556689d4bb53237470bdc9deca5e54df5bda8b Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 30 Jul 2017 22:38:43 +0800 Subject: [PATCH 16/51] "remove unused code" --- paddle/operators/random_op.cc | 2 -- python/paddle/v2/framework/tests/test_random_op.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/paddle/operators/random_op.cc b/paddle/operators/random_op.cc index 16e526dc4f..674c851345 100644 --- a/paddle/operators/random_op.cc +++ b/paddle/operators/random_op.cc @@ -26,7 +26,6 @@ public: void Compute(const framework::KernelContext& context) const override { auto mean = context.op_.GetAttr("mean"); auto std = context.op_.GetAttr("std"); - // auto seed = context.op_.GetAttr("seed"); auto* output = context.Output(0)->GetMutable(); T* r = output->mutable_data(context.GetPlace()); auto ctx = @@ -60,7 +59,6 @@ public: framework::OpAttrChecker* op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddAttr>("shape", "The shape of matrix to be randomized"); - // AddAttr("seed", "random seed generator.").SetDefault(1337); AddAttr("mean", "mean value of random.").SetDefault(.0); AddAttr("std", "minimum value of random value") .SetDefault(1.0) diff --git a/python/paddle/v2/framework/tests/test_random_op.py b/python/paddle/v2/framework/tests/test_random_op.py index 1c9ff350df..d3474880d3 100644 --- a/python/paddle/v2/framework/tests/test_random_op.py +++ b/python/paddle/v2/framework/tests/test_random_op.py @@ -10,7 +10,7 @@ class TestRandomOp(unittest.TestCase): scope = core.Scope(None) # Out = scope.create_var("Out") op = creation.op_creations.gaussian_random( - shape=[1000, 1000], mean=5.0, std=1.0, seed=1701, Out="Out") + shape=[1000, 1000], mean=5.0, std=1.0, Out="Out") for out in op.outputs(): if scope.get_var(out) is None: scope.create_var(out).get_tensor() From 49739265c728575734afd6079c911f8383d88346 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 30 Jul 2017 22:46:56 +0800 Subject: [PATCH 17/51] "fix register error" --- paddle/operators/random_op.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/operators/random_op.cu b/paddle/operators/random_op.cu index 78a00bc899..c63eafb0a1 100644 --- a/paddle/operators/random_op.cu +++ b/paddle/operators/random_op.cu @@ -27,4 +27,4 @@ public: typedef paddle::operators::GaussianRandomOpKernel RandomOpKernel_GPU_float; -REGISTER_OP_GPU_KERNEL(random, RandomOpKernel_GPU_float); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(gaussian_random, GaussianRandomOpKernel_GPU_float); \ No newline at end of file From 0802197924d884c7d8a9531c541d9d4e4f376885 Mon Sep 17 00:00:00 2001 From: Zhuoyuan Date: Wed, 2 Aug 2017 16:00:06 -0700 Subject: [PATCH 18/51] gather and scatter-update added --- paddle/operators/gather_func.h | 114 ++++++++++++++++++++++++++++++ paddle/operators/scatter_func.h | 119 ++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 paddle/operators/gather_func.h create mode 100644 paddle/operators/scatter_func.h diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h new file mode 100644 index 0000000000..09e751ce17 --- /dev/null +++ b/paddle/operators/gather_func.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" +#include "paddle/framework/ddim.h" + +/** + * Return a new tensor from source tensor, gathered according to index + * input[src]: type-T source Tensor + * input[Index]: type-int index Tensor (1-D) + * return: output tensor + */ +template +Tensor* Gather_func(Tensor* Src, Tensor* Index) { + // assert index is an int-type tensor? + // assert(Index->istype(int)); + + // check index of shape 1-D + assert(Index->dims().size()==1); + int index_size = Index->dims()[0]; + + // Source shape + auto src_dims = Src->dims(); + DDim output_dims(dims_src); + // Create a tensor of shape [index_size, dim_src[1:]] + output_dims[0] = index_size; + + Tensor* New_tensor; + float* output = nullptr; + + /* slice size */ + int slice_size = 1; + for(unsigned int i = 0; i < src_dims.size(); ++i) + slice_size *= src_dims[i]; + + /* Gathering */ + if (place == CPUPlace()) { + // init for CPU + output = New_tensor.mutable_data(output_dims, CPUPlace()); + CPUGather(Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); + } else { // GPU + // init for GPU + output = New_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUGather(d, Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); + } + return New_tensor; +} + +/* Implementation of CPU copy */ +template +void CPUGather(const T* params, const int* indices, + const int slice_size, const int index_size, + T* output) { + const size_t slice_bytes = slice_size * sizeof(T); + + for(int i = 0; i < index_size; ++i) + int index_ = indices[i]; + /* copy src[index_] to output[i] */ + memcpy(output + i * slice_bytes, + params + index_ * slice_bytes, + slice_bytes); +} + +/* Implementation of GPU copy: + I suppose the GPUDevice& d, contains gpu_id and thread_id + d = cuda_stream(gpu_id_, stream_id_); +*/ +template +void GPUGather(const GPUDevice& d, + const T* src, const int* Index, + const int slice_size, const int index_size, + T* output) { + int block_count = slice_size * index_size; + int thread_per_block = 1024; + + GatherOpKernel + <<>>( + src, Index, output, slice_size, + indices_size, slice_size, out_size); +} + +template +__global__ void GatherOpKernel(const T* params, const int* indices, T* out, + int64 indices_size, + int64 slice_size, int64 out_size) { + /* I suppose we have the following macro, + which I strongly suggest that we should put in cuda: + #define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + */ + CUDA_1D_KERNEL_LOOP(i, out_size) { + int indices_i = i / slice_size; + int slice_i = i - indices_i * slice_size; // offset inside the slice + int gather_i = indices[indices_i]; + int params_i = gather_i * slice_size + slice_i; + out[i] = *(params + params_i); + } +} diff --git a/paddle/operators/scatter_func.h b/paddle/operators/scatter_func.h new file mode 100644 index 0000000000..6ee3fdf3a3 --- /dev/null +++ b/paddle/operators/scatter_func.h @@ -0,0 +1,119 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" +#include "paddle/framework/ddim.h" + +/** + * Return a updated tensor from source tensor, scattered according to index: + * dst[i] += src[index[i]] + * input[src]: type-T source Tensor + * input[Index]: type-int index Tensor (1-D) + * return: output tensor + */ +template +void ScatterUpdate_func(Tensor* Src, Tensor* Dst, Tensor* Index) { + // assert index is an int-type tensor + assert(Index->istype(int)); + + // Source shape + auto src_dims = Src->dims(); + auto dst_dims = Dst->dims(); + DDim output_dims(dims_src); + + // check Src shape and Dst shape should match + for(int i = 1; i < src_dims.size(); i++) + assert(src_dims[i]==dst_dims[i]); + + int index_size = Index->dims()[0]; + + /* slice size */ + int slice_size = 1; + for(unsigned int i = 0; i < src_dims.size(); ++i) + slice_size *= src_dims[i]; + + if (place == CPUPlace()) { + // init + output = new_tensor.mutable_data(output_dims, CPUPlace()); + CPUScatterUpdate(src->data(), index->data(), slice_size, new_tensor->mutable_data()); + + } else { // GPU + // init + output = new_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUScatterUpdate(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } +} + +/* Implementation of CPU copy */ +template +void CPUScatterUpdate(const T* src, const int* Index, + const int slice_size, const int index_size, + T* output) { + //const size_t slice_bytes = slice_size * sizeof(T); + + for(int i = 0; i < index_size; ++i) + int index_ = index[i]; + /* dst[index_] += src[index_] + add operation size: slice_size + */ + math::vAdd(slice_size, src + index_ * slice_bytes, + output + i * slice_bytes, + output + i * slice_bytes); + /* Scatter update, not just assign + memcpy(output + i * slice_bytes, + src + index_ * slice_bytes, + slice_bytes); + */ +} + +/* Implementation of GPU scatter: + I suppose the GPUDevice& d, contains gpu_id and thread_id + d = cuda_stream(gpu_id_, stream_id_); +*/ +template +void GPUScatterUpdate(const GPUDevice& d, + const T* src, const int* Index, + const int slice_size, const int index_size, + T* output) { + int block_count = slice_size * index_size; + int thread_per_block = 1024; + + ScatterOpKernel + <<>>( + src, Index, output, slice_size, + indices_size, slice_size, out_size); +} + +template +__global__ void ScatterOpKernel(const T* params, const int* indices, T* out, + int64 indices_size, + int64 slice_size, int64 out_size) { + /* I suppose we have the following macro, + which I strongly suggest that we should put in cuda: + #define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + */ + CUDA_1D_KERNEL_LOOP(i, out_size) { + int indices_i = i / slice_size; + int slice_i = i - indices_i * slice_size; // offset inside the slice + int scatter_i = indices[indices_i]; + int params_i = scatter_i * slice_size + slice_i; + out[i] += *(params + params_i); + } +} From 2b35fca18f66e5f92315e369a687a5e908aedf1e Mon Sep 17 00:00:00 2001 From: Zhuoyuan Date: Wed, 2 Aug 2017 22:34:58 -0700 Subject: [PATCH 19/51] gather modify --- paddle/operators/gather_func.h | 71 ++++++++++++++++------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h index 09e751ce17..e255bd7d15 100644 --- a/paddle/operators/gather_func.h +++ b/paddle/operators/gather_func.h @@ -21,44 +21,41 @@ limitations under the License. */ /** * Return a new tensor from source tensor, gathered according to index * input[src]: type-T source Tensor - * input[Index]: type-int index Tensor (1-D) + * input[index]: type-int index Tensor (1-D) * return: output tensor */ -template -Tensor* Gather_func(Tensor* Src, Tensor* Index) { - // assert index is an int-type tensor? - // assert(Index->istype(int)); +template +Tensor* Gather(Tensor* src, Tensor* index) { + // check index of shape 1-D + PADDLE_ENFORCE(index->dims().size()==1); + int index_size = index->dims()[0]; - // check index of shape 1-D - assert(Index->dims().size()==1); - int index_size = Index->dims()[0]; + // Source shape + auto src_dims = src->dims(); + DDim output_dims(dims_src); + // Create a tensor of shape [index_size, dim_src[1:]] + output_dims[0] = index_size; - // Source shape - auto src_dims = Src->dims(); - DDim output_dims(dims_src); - // Create a tensor of shape [index_size, dim_src[1:]] - output_dims[0] = index_size; + Tensor* New_tensor; + float* output = nullptr; - Tensor* New_tensor; - float* output = nullptr; + /* slice size */ + int slice_size = 1; + for(unsigned int i = 0; i < src_dims.size(); ++i) + slice_size *= src_dims[i]; - /* slice size */ - int slice_size = 1; - for(unsigned int i = 0; i < src_dims.size(); ++i) - slice_size *= src_dims[i]; - - /* Gathering */ - if (place == CPUPlace()) { - // init for CPU - output = New_tensor.mutable_data(output_dims, CPUPlace()); - CPUGather(Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); - } else { // GPU - // init for GPU - output = New_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUGather(d, Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); - } - return New_tensor; + /* Gathering */ + if (place == CPUPlace()) { + // init for CPU + output = New_tensor.mutable_data(output_dims, CPUPlace()); + CPUGather(src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } else { // GPU + // init for GPU + output = New_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUGather(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } + return New_tensor; } /* Implementation of CPU copy */ @@ -82,15 +79,15 @@ void CPUGather(const T* params, const int* indices, */ template void GPUGather(const GPUDevice& d, - const T* src, const int* Index, + const T* src, const int* index, const int slice_size, const int index_size, T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; + int block_count = slice_size * index_size; + int thread_per_block = 1024; - GatherOpKernel + GatherOpKernel <<>>( - src, Index, output, slice_size, + src, index, output, slice_size, indices_size, slice_size, out_size); } From eef55ca700a4f75e76996bbab04224470bb80f36 Mon Sep 17 00:00:00 2001 From: Zhuoyuan Date: Thu, 3 Aug 2017 01:02:40 -0700 Subject: [PATCH 20/51] remodify --- paddle/operators/gather_func.h | 76 ++++++++++-------- paddle/operators/scatter_func.h | 137 ++++++++++++++++---------------- 2 files changed, 108 insertions(+), 105 deletions(-) diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h index e255bd7d15..5975675cbb 100644 --- a/paddle/operators/gather_func.h +++ b/paddle/operators/gather_func.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once #include +#include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -#include "paddle/framework/ddim.h" /** * Return a new tensor from source tensor, gathered according to index @@ -27,7 +27,7 @@ limitations under the License. */ template Tensor* Gather(Tensor* src, Tensor* index) { // check index of shape 1-D - PADDLE_ENFORCE(index->dims().size()==1); + PADDLE_ENFORCE(index->dims().size() == 1); int index_size = index->dims()[0]; // Source shape @@ -41,61 +41,67 @@ Tensor* Gather(Tensor* src, Tensor* index) { /* slice size */ int slice_size = 1; - for(unsigned int i = 0; i < src_dims.size(); ++i) - slice_size *= src_dims[i]; + for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; /* Gathering */ if (place == CPUPlace()) { - // init for CPU - output = New_tensor.mutable_data(output_dims, CPUPlace()); - CPUGather(src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } else { // GPU - // init for GPU - output = New_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUGather(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + // init for CPU + output = New_tensor.mutable_data(output_dims, CPUPlace()); + CPUGather( + src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } else { // GPU + // init for GPU + output = New_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUGather( + d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); } return New_tensor; } /* Implementation of CPU copy */ -template -void CPUGather(const T* params, const int* indices, - const int slice_size, const int index_size, - T* output) { +template +void CPUGather(const T* params, + const int* indices, + const int slice_size, + const int index_size, + T* output) { const size_t slice_bytes = slice_size * sizeof(T); - for(int i = 0; i < index_size; ++i) - int index_ = indices[i]; - /* copy src[index_] to output[i] */ - memcpy(output + i * slice_bytes, - params + index_ * slice_bytes, - slice_bytes); + for (size_t i = 0; i < index_size; ++i) { + int index_ = indices[i]; + /* copy src[index_] to output[i] */ + memcpy( + output + i * slice_bytes, params + index_ * slice_bytes, slice_bytes); + } } /* Implementation of GPU copy: I suppose the GPUDevice& d, contains gpu_id and thread_id d = cuda_stream(gpu_id_, stream_id_); */ -template +template void GPUGather(const GPUDevice& d, - const T* src, const int* index, - const int slice_size, const int index_size, - T* output) { + const T* src, + const int* index, + const int slice_size, + const int index_size, + T* output) { int block_count = slice_size * index_size; int thread_per_block = 1024; - GatherOpKernel - <<>>( - src, index, output, slice_size, - indices_size, slice_size, out_size); + GatherOpKernel<<>>( + src, index, output, slice_size, indices_size, slice_size, out_size); } template -__global__ void GatherOpKernel(const T* params, const int* indices, T* out, +__global__ void GatherOpKernel(const T* params, + const int* indices, + T* out, int64 indices_size, - int64 slice_size, int64 out_size) { - /* I suppose we have the following macro, + int64 slice_size, + int64 out_size) { + /* I suppose we have the following macro, which I strongly suggest that we should put in cuda: #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ @@ -103,9 +109,9 @@ __global__ void GatherOpKernel(const T* params, const int* indices, T* out, */ CUDA_1D_KERNEL_LOOP(i, out_size) { int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice + int slice_i = i - indices_i * slice_size; // offset inside the slice int gather_i = indices[indices_i]; int params_i = gather_i * slice_size + slice_i; out[i] = *(params + params_i); - } + } } diff --git a/paddle/operators/scatter_func.h b/paddle/operators/scatter_func.h index 6ee3fdf3a3..53b260170f 100644 --- a/paddle/operators/scatter_func.h +++ b/paddle/operators/scatter_func.h @@ -14,96 +14,93 @@ limitations under the License. */ #pragma once #include +#include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -#include "paddle/framework/ddim.h" /** * Return a updated tensor from source tensor, scattered according to index: * dst[i] += src[index[i]] * input[src]: type-T source Tensor - * input[Index]: type-int index Tensor (1-D) + * input[index]: type-int index Tensor (1-D) * return: output tensor */ -template -void ScatterUpdate_func(Tensor* Src, Tensor* Dst, Tensor* Index) { - // assert index is an int-type tensor - assert(Index->istype(int)); - - // Source shape - auto src_dims = Src->dims(); - auto dst_dims = Dst->dims(); - DDim output_dims(dims_src); - - // check Src shape and Dst shape should match - for(int i = 1; i < src_dims.size(); i++) - assert(src_dims[i]==dst_dims[i]); - - int index_size = Index->dims()[0]; - - /* slice size */ - int slice_size = 1; - for(unsigned int i = 0; i < src_dims.size(); ++i) - slice_size *= src_dims[i]; - - if (place == CPUPlace()) { - // init - output = new_tensor.mutable_data(output_dims, CPUPlace()); - CPUScatterUpdate(src->data(), index->data(), slice_size, new_tensor->mutable_data()); - - } else { // GPU - // init - output = new_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUScatterUpdate(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } +template +void ScatterUpdate(Tensor* src, Tensor* dst, Tensor* index) { + // Source shape + auto src_dims = src->dims(); + auto dst_dims = dst->dims(); + DDim output_dims(dims_src); + + // check src shape and dst shape should match + for (size_t i = 1; i < src_dims.size(); i++) + PADDLE_ENFORCE(src_dims[i] == dst_dims[i]); + + int index_size = index->dims()[0]; + + /* slice size */ + int slice_size = 1; + for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + + if (place == CPUPlace()) { + // init + output = new_tensor.mutable_data(output_dims, CPUPlace()); + CPUScatterUpdate( + src->data(), index->data(), slice_size, new_tensor->mutable_data()); + + } else { // GPU + // init + output = new_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUScatterUpdate( + d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } } /* Implementation of CPU copy */ -template -void CPUScatterUpdate(const T* src, const int* Index, - const int slice_size, const int index_size, - T* output) { - //const size_t slice_bytes = slice_size * sizeof(T); - - for(int i = 0; i < index_size; ++i) - int index_ = index[i]; - /* dst[index_] += src[index_] - add operation size: slice_size - */ - math::vAdd(slice_size, src + index_ * slice_bytes, - output + i * slice_bytes, - output + i * slice_bytes); - /* Scatter update, not just assign - memcpy(output + i * slice_bytes, - src + index_ * slice_bytes, - slice_bytes); - */ +template +void CPUScatterUpdate(const T* src, + const int* index, + const int slice_size, + const int index_size, + T* output) { + // const size_t slice_bytes = slice_size * sizeof(T); + + for (size_t i = 0; i < index_size; ++i) { + int index_ = index[i]; + math::vAdd(slice_size, + src + index_ * slice_bytes, + output + i * slice_bytes, + output + i * slice_bytes); + } } /* Implementation of GPU scatter: I suppose the GPUDevice& d, contains gpu_id and thread_id d = cuda_stream(gpu_id_, stream_id_); */ -template +template void GPUScatterUpdate(const GPUDevice& d, - const T* src, const int* Index, - const int slice_size, const int index_size, - T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; - - ScatterOpKernel - <<>>( - src, Index, output, slice_size, - indices_size, slice_size, out_size); + const T* src, + const int* index, + const int slice_size, + const int index_size, + T* output) { + int block_count = slice_size * index_size; + int thread_per_block = 1024; + + ScatterOpKernel<<>>( + src, index, output, slice_size, indices_size, slice_size, out_size); } template -__global__ void ScatterOpKernel(const T* params, const int* indices, T* out, - int64 indices_size, - int64 slice_size, int64 out_size) { - /* I suppose we have the following macro, +__global__ void ScatterOpKernel(const T* params, + const int* indices, + T* out, + int64 indices_size, + int64 slice_size, + int64 out_size) { + /* I suppose we have the following macro, which I strongly suggest that we should put in cuda: #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ @@ -111,9 +108,9 @@ __global__ void ScatterOpKernel(const T* params, const int* indices, T* out, */ CUDA_1D_KERNEL_LOOP(i, out_size) { int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice + int slice_i = i - indices_i * slice_size; // offset inside the slice int scatter_i = indices[indices_i]; int params_i = scatter_i * slice_size + slice_i; out[i] += *(params + params_i); - } + } } From 6b3e0b786d9de3ef912953859e23204066aa70a4 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Sat, 5 Aug 2017 15:05:51 -0700 Subject: [PATCH 21/51] gather function with test passed --- paddle/operators/CMakeLists.txt | 5 ++ paddle/operators/gather_func.cc | 19 +++++ paddle/operators/gather_func.h | 124 ++++++++++++++------------------ paddle/operators/gather_test.cc | 50 +++++++++++++ 4 files changed, 126 insertions(+), 72 deletions(-) create mode 100644 paddle/operators/gather_func.cc create mode 100644 paddle/operators/gather_test.cc diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index b910bee836..10922892ca 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,6 +41,11 @@ function(op_library TARGET) endif() endfunction() +op_library(gather SRCS gather_func.cc) +cc_test(gather_test SRCS gather_test.cc DEPS gather) + +op_library(scatter SRCS scatter_func.cc) + op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) diff --git a/paddle/operators/gather_func.cc b/paddle/operators/gather_func.cc new file mode 100644 index 0000000000..a6b2331f32 --- /dev/null +++ b/paddle/operators/gather_func.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/gather_func.h" +#include +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h index 5975675cbb..5adc1e6b17 100644 --- a/paddle/operators/gather_func.h +++ b/paddle/operators/gather_func.h @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -13,51 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include + #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -/** - * Return a new tensor from source tensor, gathered according to index - * input[src]: type-T source Tensor - * input[index]: type-int index Tensor (1-D) - * return: output tensor - */ -template -Tensor* Gather(Tensor* src, Tensor* index) { - // check index of shape 1-D - PADDLE_ENFORCE(index->dims().size() == 1); - int index_size = index->dims()[0]; - - // Source shape - auto src_dims = src->dims(); - DDim output_dims(dims_src); - // Create a tensor of shape [index_size, dim_src[1:]] - output_dims[0] = index_size; - - Tensor* New_tensor; - float* output = nullptr; - - /* slice size */ - int slice_size = 1; - for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; +using paddle::framework::Tensor; +using paddle::framework::DDim; - /* Gathering */ - if (place == CPUPlace()) { - // init for CPU - output = New_tensor.mutable_data(output_dims, CPUPlace()); - CPUGather( - src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } else { // GPU - // init for GPU - output = New_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUGather( - d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } - return New_tensor; -} +namespace paddle { +namespace operators { /* Implementation of CPU copy */ template @@ -70,48 +37,61 @@ void CPUGather(const T* params, for (size_t i = 0; i < index_size; ++i) { int index_ = indices[i]; - /* copy src[index_] to output[i] */ - memcpy( - output + i * slice_bytes, params + index_ * slice_bytes, slice_bytes); + // copy src[index_] to output[i] + memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); } } /* Implementation of GPU copy: - I suppose the GPUDevice& d, contains gpu_id and thread_id - d = cuda_stream(gpu_id_, stream_id_); + I suppose the GPUDevice& d, contains gpu_id and thread_id + d = cuda_stream(gpu_id_, stream_id_); */ template -void GPUGather(const GPUDevice& d, - const T* src, +void GPUGather(const T* src, const int* index, const int slice_size, const int index_size, - T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; - - GatherOpKernel<<>>( - src, index, output, slice_size, indices_size, slice_size, out_size); -} + T* output); +/** + * Return a new tensor from source tensor, gathered according to index + * input[src]: type-T source Tensor + * input[index]: type-int index Tensor (1-D) + * return: output tensor + */ template -__global__ void GatherOpKernel(const T* params, - const int* indices, - T* out, - int64 indices_size, - int64 slice_size, - int64 out_size) { - /* I suppose we have the following macro, - which I strongly suggest that we should put in cuda: - #define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - */ - CUDA_1D_KERNEL_LOOP(i, out_size) { - int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice - int gather_i = indices[indices_i]; - int params_i = gather_i * slice_size + slice_i; - out[i] = *(params + params_i); +void Gather(const platform::Place& place, + const paddle::framework::Tensor* src, + const paddle::framework::Tensor* index, + paddle::framework::Tensor* output) { + // check index of shape 1-D + PADDLE_ENFORCE(index->dims().size() == 1); + int index_size = index->dims()[0]; + + auto src_dims = src->dims(); + DDim output_dims(src_dims); + output_dims[0] = index_size; + + // slice size + int slice_size = 1; + for (size_t i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + + // Gathering + if (platform::is_cpu_place(place)) { + CPUGather(src->data(), + index->data(), + slice_size, + index_size, + output->data()); + } else { + // init for GPU + // output_arr = output->mutable_data(output_dims, platform::GPUPlace()); + // how to specialize device?? + // GPUGather( + // d, src->data(), index->data(), slice_size, + // new_tensor->mutable_data()); } } + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc new file mode 100644 index 0000000000..6f220b133b --- /dev/null +++ b/paddle/operators/gather_test.cc @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/operators/gather_func.h" +#include "paddle/platform/place.h" + +#include +#include +#include + +TEST(_abc_, GatherData) { + using namespace paddle::framework; + using namespace paddle::platform; + using namespace paddle::operators; + + Tensor* src = new Tensor(); + Tensor* index = new Tensor(); + Tensor* output = new Tensor(); + // src.Resize(make_ddim({3, 4})); + + int* p_src = nullptr; + int* p_index = nullptr; + p_src = src->mutable_data(make_ddim({3, 4}), CPUPlace()); + p_index = index->mutable_data(make_ddim({2}), CPUPlace()); + + for (size_t i = 0; i < 12; ++i) p_src[i] = i; + p_index[0] = 1; + p_index[1] = 0; + + // gather + int* p_output = output->mutable_data(make_ddim({2, 4}), CPUPlace()); + + Gather(CPUPlace(), src, index, output); + + for (size_t i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); + for (size_t i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); +} From 0f8c9dbe61762092a701ac035445dbae31b27338 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 6 Aug 2017 15:37:36 +0800 Subject: [PATCH 22/51] device context pointer --- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/gaussian_random_op.cc | 11 +++++------ paddle/operators/gaussian_random_op.cu | 26 +++++++++++++------------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 8887dc6dbd..3b60df0218 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -55,7 +55,7 @@ op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) -op_library(guassian_random_op SRCS guassain_random_op.cc guassian_random_op.cu) +op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu) op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 7afc0cd56b..f5fd902c5f 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -12,9 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/operators/gaussian_random_op.h" #include "glog/logging.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/random_op.h" namespace paddle { namespace operators { @@ -22,7 +22,7 @@ namespace operators { template class GaussianRandomOpKernel : public framework::OpKernel { -public: + public: void Compute(const framework::KernelContext& context) const override { auto mean = context.op_.GetAttr("mean"); auto std = context.op_.GetAttr("std"); @@ -40,7 +40,7 @@ public: }; class GaussianRandomOp : public framework::OperatorWithKernel { -protected: + protected: void InferShape( const std::vector& inputs, const std::vector& outputs) const override { @@ -54,7 +54,7 @@ protected: }; class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { -public: + public: GaussianRandomOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { @@ -74,8 +74,7 @@ The eqution : Out = GaussianRandom(Shape=(d0, d1, ...), Dtype, mean, std) } // namespace operators } // namespace paddle -REGISTER_OP(gaussian_random, - paddle::operators::GaussianRandomOp, +REGISTER_OP(gaussian_random, paddle::operators::GaussianRandomOp, paddle::operators::GaussianRandomOpMaker); typedef paddle::operators::GaussianRandomOpKernel -class GaussianRandomOpKernel : public framework::OpKernel { -public: + +template +class GaussianRandomOpKernel + : public framework::OpKernel { + public: void Compute(const framework::KernelContext& context) const override { auto mean = context.op_.GetAttr("mean"); auto std = context.op_.GetAttr("std"); auto* output = context.Output(0)->GetMutable(); T* r = output->mutable_data(context.GetPlace()); - auto ctx = static_cast - (context.device_context_); - // generator need to modify context + auto ctx = + static_cast(context.device_context_); + // generator need to modify context auto g = const_cast(ctx)->RandGenerator(); curandGenerateNormal(g, r, framework::product(output->dims()), mean, std); - } }; - + } // namespace operators } // namespace paddle - -typedef paddle::operators::GaussianRandomOpKernel - RandomOpKernel_GPU_float; +typedef paddle::operators::GaussianRandomOpKernel + RandomOpKernel_GPU_float; REGISTER_OP_GPU_KERNEL(gaussian_random, GaussianRandomOpKernel_GPU_float); \ No newline at end of file From fcd6f64b98aafdb13d29395eaa3573f69632382a Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 7 Aug 2017 17:28:38 +0800 Subject: [PATCH 23/51] "redefine random op" --- paddle/operators/gaussian_random_op.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index f5fd902c5f..d7ced6b526 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -23,7 +23,7 @@ template class GaussianRandomOpKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto mean = context.op_.GetAttr("mean"); auto std = context.op_.GetAttr("std"); auto* output = context.Output(0)->GetMutable(); @@ -41,15 +41,14 @@ class GaussianRandomOpKernel class GaussianRandomOp : public framework::OperatorWithKernel { protected: - void InferShape( - const std::vector& inputs, - const std::vector& outputs) const override { + void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE(inputs.size() == 0, "Input size of RandomOp must be zero."); PADDLE_ENFORCE(outputs.size() == 1, "Output size of RandomOp must be one."); PADDLE_ENFORCE(outputs[0] != nullptr, "Outputs of RandomOp must all be set."); - outputs[0]->Resize( - framework::make_ddim(this->GetAttr>("shape"))); + auto* tensor = ctx.Output(0); + auto dims = GetAttr(std::vector("shape")); + tensor->Resize(framework::make_ddim(dims)); } }; From 7e3747131899685b7b058241576a1e2e96f172ea Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Mon, 7 Aug 2017 14:26:47 -0700 Subject: [PATCH 24/51] modify --- paddle/operators/CMakeLists.txt | 5 +- paddle/operators/{gather_func.h => gather.h} | 0 paddle/operators/gather_func.cc | 19 --- paddle/operators/gather_test.cc | 4 +- paddle/operators/scatter_func.h | 116 ------------------- 5 files changed, 3 insertions(+), 141 deletions(-) rename paddle/operators/{gather_func.h => gather.h} (100%) delete mode 100644 paddle/operators/gather_func.cc delete mode 100644 paddle/operators/scatter_func.h diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 10922892ca..a2284fc8f0 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,10 +41,7 @@ function(op_library TARGET) endif() endfunction() -op_library(gather SRCS gather_func.cc) -cc_test(gather_test SRCS gather_test.cc DEPS gather) - -op_library(scatter SRCS scatter_func.cc) +cc_test(gather_test SRCS gather_test.cc DEPS tensor) op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather.h similarity index 100% rename from paddle/operators/gather_func.h rename to paddle/operators/gather.h diff --git a/paddle/operators/gather_func.cc b/paddle/operators/gather_func.cc deleted file mode 100644 index a6b2331f32..0000000000 --- a/paddle/operators/gather_func.cc +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/gather_func.h" -#include -#include "paddle/framework/ddim.h" -#include "paddle/framework/tensor.h" -#include "paddle/platform/place.h" diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc index 6f220b133b..5d84b7b5f3 100644 --- a/paddle/operators/gather_test.cc +++ b/paddle/operators/gather_test.cc @@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/operators/gather.h" #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" -#include "paddle/operators/gather_func.h" #include "paddle/platform/place.h" #include #include #include -TEST(_abc_, GatherData) { +TEST(Gather, GatherData) { using namespace paddle::framework; using namespace paddle::platform; using namespace paddle::operators; diff --git a/paddle/operators/scatter_func.h b/paddle/operators/scatter_func.h deleted file mode 100644 index 53b260170f..0000000000 --- a/paddle/operators/scatter_func.h +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "paddle/framework/ddim.h" -#include "paddle/framework/tensor.h" -#include "paddle/platform/place.h" - -/** - * Return a updated tensor from source tensor, scattered according to index: - * dst[i] += src[index[i]] - * input[src]: type-T source Tensor - * input[index]: type-int index Tensor (1-D) - * return: output tensor - */ -template -void ScatterUpdate(Tensor* src, Tensor* dst, Tensor* index) { - // Source shape - auto src_dims = src->dims(); - auto dst_dims = dst->dims(); - DDim output_dims(dims_src); - - // check src shape and dst shape should match - for (size_t i = 1; i < src_dims.size(); i++) - PADDLE_ENFORCE(src_dims[i] == dst_dims[i]); - - int index_size = index->dims()[0]; - - /* slice size */ - int slice_size = 1; - for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; - - if (place == CPUPlace()) { - // init - output = new_tensor.mutable_data(output_dims, CPUPlace()); - CPUScatterUpdate( - src->data(), index->data(), slice_size, new_tensor->mutable_data()); - - } else { // GPU - // init - output = new_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUScatterUpdate( - d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } -} - -/* Implementation of CPU copy */ -template -void CPUScatterUpdate(const T* src, - const int* index, - const int slice_size, - const int index_size, - T* output) { - // const size_t slice_bytes = slice_size * sizeof(T); - - for (size_t i = 0; i < index_size; ++i) { - int index_ = index[i]; - math::vAdd(slice_size, - src + index_ * slice_bytes, - output + i * slice_bytes, - output + i * slice_bytes); - } -} - -/* Implementation of GPU scatter: - I suppose the GPUDevice& d, contains gpu_id and thread_id - d = cuda_stream(gpu_id_, stream_id_); -*/ -template -void GPUScatterUpdate(const GPUDevice& d, - const T* src, - const int* index, - const int slice_size, - const int index_size, - T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; - - ScatterOpKernel<<>>( - src, index, output, slice_size, indices_size, slice_size, out_size); -} - -template -__global__ void ScatterOpKernel(const T* params, - const int* indices, - T* out, - int64 indices_size, - int64 slice_size, - int64 out_size) { - /* I suppose we have the following macro, - which I strongly suggest that we should put in cuda: - #define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - */ - CUDA_1D_KERNEL_LOOP(i, out_size) { - int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice - int scatter_i = indices[indices_i]; - int params_i = scatter_i * slice_size + slice_i; - out[i] += *(params + params_i); - } -} From e2c08d286fe8f230cb20ab5b6de6601d93feb82c Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 8 Aug 2017 16:21:21 +0800 Subject: [PATCH 25/51] "keep style same with uniform operators" --- paddle/operators/gaussian_random_op.cc | 69 +++++++++++++------------- paddle/operators/gaussian_random_op.cu | 43 +++++++++------- paddle/operators/gaussian_random_op.h | 17 ------- 3 files changed, 60 insertions(+), 69 deletions(-) delete mode 100644 paddle/operators/gaussian_random_op.h diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index d7ced6b526..b0b68ff36d 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -12,42 +12,42 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/gaussian_random_op.h" -#include "glog/logging.h" +#include #include "paddle/framework/op_registry.h" namespace paddle { namespace operators { template -class GaussianRandomOpKernel - : public framework::OpKernel { +class GaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto mean = context.op_.GetAttr("mean"); - auto std = context.op_.GetAttr("std"); - auto* output = context.Output(0)->GetMutable(); - T* r = output->mutable_data(context.GetPlace()); - auto ctx = - static_cast(context.device_context_); - // generator need to modify context - auto g = const_cast(ctx)->RandGenerator(); + T mean = static_cast(context.op_.GetAttr("mean")); + T std = static_cast(context.op_.GetAttr("std")); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + // TODO(dzh): attribute does not support unsigned int. + // And we need a global random seed configuration. + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + seed = std::random_device()(); + } + std::mt19937 g(seed); std::normal_distribution distribution(mean, std); - for (int i = 0; i < framework::product(output->dims()); ++i) { - r[i] = distribution(g); + for (int i = 0; i < framework::product(tensor->dims()); ++i) { + data[i] = distribution(g); } } }; class GaussianRandomOp : public framework::OperatorWithKernel { protected: - void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE(inputs.size() == 0, "Input size of RandomOp must be zero."); - PADDLE_ENFORCE(outputs.size() == 1, "Output size of RandomOp must be one."); - PADDLE_ENFORCE(outputs[0] != nullptr, - "Outputs of RandomOp must all be set."); - auto* tensor = ctx.Output(0); - auto dims = GetAttr(std::vector("shape")); + void InferShape(const framework::InferShapeContext& context) const override { + auto* tensor = context.Output(0); + auto dims = GetAttr>("dims"); + PADDLE_ENFORCE(dims.size() > 0UL, + "dims can be one int or array. dims must be set."); tensor->Resize(framework::make_ddim(dims)); } }; @@ -57,26 +57,25 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { GaussianRandomOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr>("shape", "The shape of matrix to be randomized"); - AddAttr("mean", "mean value of random.").SetDefault(.0); - AddAttr("std", "minimum value of random value") - .SetDefault(1.0) - .LargerThan(.0); AddOutput("Out", "output matrix of random op"); AddComment(R"DOC( -GaussianRandom Operator fill a matrix in normal distribution. -The eqution : Out = GaussianRandom(Shape=(d0, d1, ...), Dtype, mean, std) +GaussianRandom operator. +Use to initialize tensor with gaussian random generator. )DOC"); + + AddAttr>("dims", "The dimension of random tensor."); + AddAttr("mean", "mean value of random.").SetDefault(.0f); + AddAttr("std", "minimum value of random value.").SetDefault(1.0f); + AddAttr("seed", + "Random seed of generator." + "0 means use system wide seed") + .SetDefault(0); } }; } // namespace operators } // namespace paddle -REGISTER_OP(gaussian_random, paddle::operators::GaussianRandomOp, - paddle::operators::GaussianRandomOpMaker); - -typedef paddle::operators::GaussianRandomOpKernel - GaussianRandomOpKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(gaussian_random, GaussianRandomOpKernel_CPU_float); +namespace ops = paddle::operators; +REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker); +REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index afede378f6..a408d2aa79 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -1,30 +1,39 @@ +#include +#include +#include "paddle/platform/dynload/curand.h" +#include "paddle/platform/gpu_info.h" + #include "paddle/framework/op_registry.h" -#include "paddle/operators/guassian_random_op.h" namespace paddle { namespace operators { template -class GaussianRandomOpKernel - : public framework::OpKernel { +class GaussianRandomKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext& context) const override { - auto mean = context.op_.GetAttr("mean"); - auto std = context.op_.GetAttr("std"); - auto* output = context.Output(0)->GetMutable(); - T* r = output->mutable_data(context.GetPlace()); - auto ctx = - static_cast(context.device_context_); - // generator need to modify context - auto g = const_cast(ctx)->RandGenerator(); - curandGenerateNormal(g, r, framework::product(output->dims()), mean, std); + void Compute(const framework::ExecutionContext& context) const override { + T mean = static_cast(context.op_.GetAttr("mean")); + T std = static_cast(context.op_.GetAttr("std")); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + seed = std::random_device()(); + } + curandGenerator_t g; + PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( + &g, CURAND_RNG_PSEUDO_DEFAULT)); + PADDLE_ENFORCE( + platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); + // auto g = const_cast(ctx)->RandGenerator(); + curandGenerateNormal(g, data, framework::product(tensor->dims()), mean, + std); } }; } // namespace operators } // namespace paddle -typedef paddle::operators::GaussianRandomOpKernel - RandomOpKernel_GPU_float; -REGISTER_OP_GPU_KERNEL(gaussian_random, GaussianRandomOpKernel_GPU_float); \ No newline at end of file +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); \ No newline at end of file diff --git a/paddle/operators/gaussian_random_op.h b/paddle/operators/gaussian_random_op.h deleted file mode 100644 index b463a171d9..0000000000 --- a/paddle/operators/gaussian_random_op.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" - -namespace paddle { -namespace operators { - -template -class GaussianRandomOpKernel : public framework::OpKernel { -public: - void Compute(const framework::KernelContext& context) const override {} -}; - -} // namespace operators -} // namespace paddle From 52d2ebdaef66f980c8ecb4878d41da6b44467115 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 8 Aug 2017 16:40:21 +0800 Subject: [PATCH 26/51] "test gaussian random in python side" --- paddle/framework/CMakeLists.txt | 1 + paddle/framework/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 2 ++ .../tests/test_gaussian_random_op.py | 33 +++++++++++++++++++ .../v2/framework/tests/test_random_op.py | 29 ---------------- 5 files changed, 37 insertions(+), 29 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_gaussian_random_op.py delete mode 100644 python/paddle/v2/framework/tests/test_random_op.py diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 1db042c6fc..9b96a59189 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -43,4 +43,5 @@ cc_library(paddle_pybind SHARED add_op mean_op cross_entropy_op + gaussian_random_op recurrent_op) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index cbb86c4195..85548e3e91 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -41,6 +41,7 @@ USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP(gaussian_random); namespace paddle { namespace framework { template diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 7eec376788..5a89984118 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -21,3 +21,5 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_op_creation_methods SRCS test_op_creation_methods.py) + +py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py new file mode 100644 index 0000000000..020e69fe14 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -0,0 +1,33 @@ +import unittest +import paddle.v2.framework.core as core +import paddle.v2.framework.op as Operator +import numpy + + +class GaussianRandomTest(unittest.TestCase): + def test_cpu(self): + self.test_gaussian_random(place=core.CPUPlace()) + + def test_gpu(self): + self.test_gaussian_random(place=core.GPUPlace(0)) + + def test_gaussian_random(self, place): + scope = core.Scope() + scope.new_var("Out").get_tensor() + op = Operator( + "gaussian_random", + Out="Out", + dims=[1000, 784], + mean=.0, + std=1., + seed=0) + op.infer_shape(scope) + context = core.DeviceContext.create(place) + op.run(scope, context) + tensor = numpy.array(scope.find_var("Out").get_tensor()) + self.assertAlmostEqual(numpy.mean(tensor), .0, places=3) + self.assertAlmostEqual(numpy.std(tensor), 1., places=3) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_random_op.py b/python/paddle/v2/framework/tests/test_random_op.py deleted file mode 100644 index d3474880d3..0000000000 --- a/python/paddle/v2/framework/tests/test_random_op.py +++ /dev/null @@ -1,29 +0,0 @@ -import unittest -import paddle.v2.framework.create_op_creation_methods as creation -import paddle.v2.framework.core as core -from op_test_util import OpTestMeta -import numpy - - -class TestRandomOp(unittest.TestCase): - def test_random(self): - scope = core.Scope(None) - # Out = scope.create_var("Out") - op = creation.op_creations.gaussian_random( - shape=[1000, 1000], mean=5.0, std=1.0, Out="Out") - for out in op.outputs(): - if scope.get_var(out) is None: - scope.create_var(out).get_tensor() - - tensor = scope.get_var("Out").get_tensor() - op.infer_shape(scope) - self.assertEqual([1000, 1000], tensor.shape()) - ctx = core.DeviceContext.cpu_context() - op.run(scope, ctx) - tensor_array = numpy.array(tensor) - self.assertAlmostEqual(numpy.mean(tensor_array), 5.0, places=3) - self.assertAlmostEqual(numpy.std(tensor_array), 1.0, places=3) - - -if __name__ == '__main__': - unittest.main() From 555af4d0f6a02c88f9503cb6ffdd5c5e66e2e9b4 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 8 Aug 2017 17:21:04 +0800 Subject: [PATCH 27/51] "format code" --- paddle/framework/CMakeLists.txt | 6 +++--- paddle/operators/gaussian_random_op.cu | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 55d4bc428e..1ebab6b8ab 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -38,9 +38,9 @@ cc_test(backward_test SRCS backward_test.cc DEPS backward) if(WITH_PYTHON) cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - fc_op + SRCS pybind.cc + DEPS pybind python backward + fc_op sgd_op add_op mean_op diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index a408d2aa79..164753f946 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include #include #include "paddle/platform/dynload/curand.h" From ba3f3f56893c7d7ad6d8cf8de019ca82cbf53ead Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 8 Aug 2017 18:50:08 +0800 Subject: [PATCH 28/51] Ignore compile generated files for git --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c84b2fc8c7..9622ab78e0 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ cmake-build-* python/paddle/v2/framework/core.so CMakeFiles cmake_install.cmake - +paddle/.timestamp +python/paddlepaddle.egg-info/ From d98e299d3b9977819afbf9db53a97c5c0bbbaa68 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 9 Aug 2017 00:16:55 +0800 Subject: [PATCH 29/51] "keep same with uniform random op" --- python/paddle/v2/framework/tests/test_gaussian_random_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index 020e69fe14..0ff8c89a14 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -1,6 +1,6 @@ import unittest import paddle.v2.framework.core as core -import paddle.v2.framework.op as Operator +from paddle.v2.framework.op import Operator import numpy From 70825506d1561d53c1efdfc5a50ef8cb8a4c4f9f Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 9 Aug 2017 00:28:04 +0800 Subject: [PATCH 30/51] "remove context random seeding " --- paddle/platform/device_context.cc | 7 ++----- paddle/platform/device_context.h | 14 ++------------ 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index f80c36b5b2..a928e09778 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -21,12 +21,10 @@ Eigen::DefaultDevice* DeviceContext::get_eigen_device() } CPUDeviceContext::CPUDeviceContext() { - random_seed_ = std::chrono::system_clock::now().time_since_epoch().count(); eigen_device_.reset(new Eigen::DefaultDevice()); } CPUDeviceContext::CPUDeviceContext(CPUPlace place) { - random_seed_ = std::chrono::system_clock::now().time_since_epoch().count(); eigen_device_.reset(new Eigen::DefaultDevice()); } @@ -44,7 +42,6 @@ Eigen::GpuDevice* DeviceContext::get_eigen_device() const { } CUDADeviceContext::CUDADeviceContext(GPUPlace place) : place_(place) { - random_seed_ = std::chrono::system_clock::now().time_since_epoch().count(); SetDeviceId(place_.device); // TODO(qijun) Pass a created cuda stream to Eigen::CudaStreamDevice directly // here will cause segment fault. We must implement a class derived from @@ -111,8 +108,8 @@ curandGenerator_t CUDADeviceContext::curand_generator() { SetDeviceId(place_.device); PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); - PADDLE_ENFORCE(dynload::curandSetPseudoRandomGeneratorSeed( - curand_generator_, random_seed_)); + PADDLE_ENFORCE( + dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_)); } return curand_generator_; } diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 450213c34a..08b5b2cff9 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -21,7 +21,6 @@ limitations under the License. */ #include "paddle/platform/gpu_info.h" #define EIGEN_USE_GPU #endif -#include #include #include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -40,7 +39,6 @@ class DeviceContext { class CPUDeviceContext : public DeviceContext { public: - typedef std::mt19937 random_generator_type; CPUDeviceContext(); explicit CPUDeviceContext(CPUPlace); virtual ~CPUDeviceContext() {} @@ -49,16 +47,7 @@ class CPUDeviceContext : public DeviceContext { Place GetPlace() const override; - random_generator_type& RandGenerator() { - if (!rand_generator_) { - rand_generator_.reset(new random_generator_type(random_seed_)); - } - return *rand_generator_.get(); - } - private: - unsigned random_seed_; - std::unique_ptr rand_generator_; std::unique_ptr eigen_device_; }; @@ -97,7 +86,8 @@ class CUDADeviceContext : public DeviceContext { std::unique_ptr eigen_stream_; private: - unsigned random_seed_; + uint64_t seed_; + // clang-format off cudnnHandle_t cudnn_handle_ = nullptr; cublasHandle_t cublas_handle_ = nullptr; From b8461c79fc72d3fc3ed8d0815058e12385eaa4d1 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 8 Aug 2017 00:00:06 +0000 Subject: [PATCH 31/51] implement init parameters selection with etcd --- go/glide.lock | 10 +- go/master/service_test.go | 4 +- go/pserver/client/c/cclient.go | 20 ++- go/pserver/client/client.go | 10 +- go/pserver/client/client_test.go | 14 +- go/pserver/client/etcd_client.go | 153 ++++++++++++++++--- go/pserver/client/etcd_client_test.go | 97 ++++++++++++ paddle/trainer/NewRemoteParameterUpdater.cpp | 2 +- 8 files changed, 269 insertions(+), 41 deletions(-) create mode 100644 go/pserver/client/etcd_client_test.go diff --git a/go/glide.lock b/go/glide.lock index be1fb24d77..1ecdd21752 100644 --- a/go/glide.lock +++ b/go/glide.lock @@ -1,5 +1,5 @@ hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582 -updated: 2017-08-03T21:46:51.744995189Z +updated: 2017-08-07T23:37:48.867469328Z imports: - name: github.com/beorn7/perks version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 @@ -10,7 +10,7 @@ imports: - name: github.com/cockroachdb/cmux version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92 - name: github.com/coreos/etcd - version: c31bec0f29facff13f7c3e3d948e55dd6689ed42 + version: d0d1a87aa96ae14914751d42264262cb69eda170 subpackages: - alarm - auth @@ -24,6 +24,7 @@ imports: - error - etcdserver - etcdserver/api + - etcdserver/api/etcdhttp - etcdserver/api/v2http - etcdserver/api/v2http/httptypes - etcdserver/api/v3client @@ -210,11 +211,6 @@ testImports: version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9 subpackages: - spew -- name: github.com/docker/docker - version: b6d164e6c46d8115b146e4c3ac93784e9ef8b49e - subpackages: - - pkg/ioutils - - pkg/longpath - name: github.com/pmezard/go-difflib version: d8ed2627bdf02c080bf22230dbb337003b7aba2d subpackages: diff --git a/go/master/service_test.go b/go/master/service_test.go index 5f91910ecc..87ce9c9043 100644 --- a/go/master/service_test.go +++ b/go/master/service_test.go @@ -1,6 +1,7 @@ package master_test import ( + "io/ioutil" "os" "testing" "time" @@ -8,13 +9,12 @@ import ( "github.com/PaddlePaddle/Paddle/go/master" "github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/embed" - "github.com/docker/docker/pkg/ioutils" "github.com/stretchr/testify/assert" ) func TestNewServiceWithEtcd(t *testing.T) { // setup an embed etcd server - etcdDir, err := ioutils.TempDir("", "") + etcdDir, err := ioutil.TempDir("", "") if err != nil { t.Fatal(err) } diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index 14ad077455..a49cd01522 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { type selector bool -func (s selector) Select() bool { - return bool(s) +func (s selector) Select() (bool, error) { + return bool(s), nil +} + +func (s selector) Done() error { + return nil } type lister []client.Server @@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { - // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) +func paddle_new_etcd_pserver_client(etcdEndpoints *C.char) C.paddle_pserver_client { addr := C.GoString(etcdEndpoints) etcdClient := client.NewEtcd(addr) - c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) + c := client.NewClient(etcdClient, etcdClient.Desired(), etcdClient) return add(c) } @@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) { //export paddle_begin_init_params func paddle_begin_init_params(client C.paddle_pserver_client) C.int { c := get(client) - if selected := c.BeginInitParams(); selected { + selected, err := c.BeginInitParams() + if err != nil { + panic(err) + } + + if selected { return 1 } return 0 diff --git a/go/pserver/client/client.go b/go/pserver/client/client.go index 15adda4735..20d91e7703 100644 --- a/go/pserver/client/client.go +++ b/go/pserver/client/client.go @@ -27,9 +27,13 @@ import ( // TODO(helin): add RPC call retry logic -// Selector selects if the client should initialize parameter servers. +// Selector selects if the client should initialize parameters and +// reports the initialization process done. type Selector interface { - Select() bool + // Select selects if the client should initialize parameter servers. + Select() (bool, error) + // Done indicates the initialization process is done. + Done() error } // Server is the identification of a parameter Server. @@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { // servers. Other trainers will be blocked until the initialization is // done, and they need to get the initialized parameters from // parameter servers using GetParams. -func (c *Client) BeginInitParams() bool { +func (c *Client) BeginInitParams() (bool, error) { return c.sel.Select() } diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index 1243ebd683..c3d88e926d 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -124,8 +124,12 @@ func initEtcdClient() { type selector bool -func (s selector) Select() bool { - return bool(s) +func (s selector) Select() (bool, error) { + return bool(s), nil +} + +func (s selector) Done() error { + return nil } type lister []client.Server @@ -135,7 +139,11 @@ func (l lister) List() []client.Server { } func testClient(t *testing.T, c *client.Client) { - selected := c.BeginInitParams() + selected, err := c.BeginInitParams() + if err != nil { + t.Fatal(err) + } + if !selected { t.Fatal("should be selected.") } diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index 977ae5af37..f9071caaa8 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -16,53 +16,60 @@ package client import ( "context" + "errors" + "fmt" "strconv" "strings" "time" "github.com/PaddlePaddle/Paddle/go/pserver" "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" log "github.com/sirupsen/logrus" ) const ( defaultEtcdTimeout time.Duration = 5 * time.Second + + initLockPath = "/init_ps/lock" + initDonePath = "/init_ps/done" + initDoneVal = "1" ) -// EtcdClient is used by pserver client that is a part of trainer process. +// Etcd is used by pserver client that is a part of trainer process. // TODO: -// 1. add watcher to watch the change state of pservers) -// 1. add etcd lock) -type EtcdClient struct { +// 1. add watcher to watch the change state of pservers. +type Etcd struct { client *clientv3.Client timeout time.Duration endpoints []string + lock *concurrency.Mutex } // Desired read ps desired number from etcd. -func (p *EtcdClient) Desired() int { +func (e *Etcd) Desired() int { var psDesired int for { - ctx, cancel := context.WithTimeout(context.Background(), p.timeout) - resp, err := p.client.Get(ctx, pserver.PsDesired) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + resp, err := e.client.Get(ctx, pserver.PsDesired) cancel() if err != nil { log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } kvs := resp.Kvs if len(kvs) == 0 { log.Infoln("Waiting for ps desired registered ...") - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) if err != nil { log.Errorf("psDesired %d invalid %v", psDesired, err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } @@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int { } // List return the pserver list read from etcd. -func (p *EtcdClient) List() []Server { - psDesired := p.Desired() +func (e *Etcd) List() []Server { + psDesired := e.Desired() servers := make([]Server, psDesired) for { for i := 0; i < psDesired; i++ { - ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) psKey := pserver.PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) - resp, err := p.client.Get(ctx, psKey) + resp, err := e.client.Get(ctx, psKey) cancel() if err != nil { log.Infof("Get psKey= %s error, %v", psKey, err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } kvs := resp.Kvs if len(kvs) == 0 { log.Infof("Waiting for ps addr registered ...") - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } @@ -100,7 +107,7 @@ func (p *EtcdClient) List() []Server { // TODO(Longfei) check the ps address if psAddr == "" { log.Infof("Get psKey = %s, psAddr is empty", psKey) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } log.Debugf("got value (%s) for key: %s", psAddr, psKey) @@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server { } // NewEtcd create a etcd client to return the state of pserver on etcd. -func NewEtcd(endpoints string) *EtcdClient { +func NewEtcd(endpoints string) *Etcd { ep := strings.Split(endpoints, ",") var cli *clientv3.Client var err error @@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient { break } log.Infof("Connected to etcd: %s\n", endpoints) - client := &EtcdClient{ + client := &Etcd{ client: cli, timeout: defaultEtcdTimeout, endpoints: ep, } return client } + +// Select indicates if the current trainer is selected to initialize +// the pserver parameters. +func (e *Etcd) Select() (bool, error) { + sess, err := concurrency.NewSession(e.client, concurrency.WithTTL(5)) + if err != nil { + return false, err + } + + lock := concurrency.NewMutex(sess, initLockPath) + log.Infof("Trying to acquire lock at %s.", initLockPath) + // Do not use timeout context here, since we don't know how + // long does it take for other trainers to initialize the + // parameters. + err = lock.Lock(context.Background()) + if err != nil { + return false, err + } + log.Infof("Successfully acquired lock at %s.", initLockPath) + + get := clientv3.OpGet(initDonePath) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + tresp, err := e.client.Txn(ctx).If(lock.IsOwner()).Then(get).Commit() + cancel() + if err != nil { + return false, err + } + + if !tresp.Succeeded { + return false, errors.New("no longer the owner of the lock") + } + + resp := tresp.Responses[0].GetResponseRange() + + if len(resp.Kvs) == 0 { + // Key value not set, select current trainer. + e.lock = lock + log.Infoln("Trainer selected.") + return true, nil + } + + if string(resp.Kvs[0].Value) == initDoneVal { + log.Infoln("Initialization is already done.") + ctx, cancel = context.WithTimeout(context.Background(), e.timeout) + err = lock.Unlock(ctx) + cancel() + if err != nil { + log.Errorln(err) + } + return false, nil + } + + return false, fmt.Errorf("key %s have unexpected value: %v", initDonePath, resp.Kvs[0].Value) +} + +// Done indicates the parameter initialization process is done. +func (e *Etcd) Done() error { + if e.lock == nil { + return errors.New("lock is nil, Done called unexpectedly") + } + + put := clientv3.OpPut(initDonePath, initDoneVal) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + tresp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit() + cancel() + if err != nil { + return err + } + + if !tresp.Succeeded { + return errors.New("no longer the owner of the lock") + } + + ctx, cancel = context.WithTimeout(context.Background(), e.timeout) + err = e.lock.Unlock(ctx) + cancel() + if err != nil { + log.Errorln(err) + } else { + e.lock = nil + } + + return nil +} + +// Close closes the etcd client. +func (e *Etcd) Close() error { + var err error + if e.lock != nil { + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + err = e.lock.Unlock(ctx) + cancel() + if err == nil { + e.lock = nil + } + } + + cErr := e.client.Close() + if cErr != nil { + if err != nil { + log.Errorln(cErr) + return err + } + return cErr + } + + return err +} diff --git a/go/pserver/client/etcd_client_test.go b/go/pserver/client/etcd_client_test.go new file mode 100644 index 0000000000..9daeb97d4c --- /dev/null +++ b/go/pserver/client/etcd_client_test.go @@ -0,0 +1,97 @@ +package client_test + +import ( + "io/ioutil" + "os" + "sync" + "testing" + + "github.com/PaddlePaddle/Paddle/go/pserver/client" + "github.com/coreos/etcd/embed" +) + +func TestSelector(t *testing.T) { + etcdDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatal(err) + } + cfg := embed.NewConfig() + cfg.Dir = etcdDir + e, err := embed.StartEtcd(cfg) + if err != nil { + t.Fatal(err) + } + + defer func() { + e.Close() + if err := os.RemoveAll(etcdDir); err != nil { + t.Fatal(err) + } + }() + + <-e.Server.ReadyNotify() + + var mu sync.Mutex + selectedCount := 0 + var wg sync.WaitGroup + selectAndDone := func(c *client.Etcd) { + defer wg.Done() + + selected, err := c.Select() + if err != nil { + panic(err) + } + + if selected { + mu.Lock() + selectedCount++ + mu.Unlock() + err = c.Done() + if err != nil { + t.Fatal(err) + } + } + } + + c0 := client.NewEtcd("127.0.0.1:2379") + c1 := client.NewEtcd("127.0.0.1:2379") + c2 := client.NewEtcd("127.0.0.1:2379") + c3 := client.NewEtcd("127.0.0.1:2379") + wg.Add(3) + go selectAndDone(c0) + go selectAndDone(c1) + go selectAndDone(c2) + wg.Wait() + + // simulate trainer crashed and restarted after the + // initialization process. + wg.Add(1) + go selectAndDone(c3) + wg.Wait() + + mu.Lock() + if selectedCount != 1 { + t.Fatal("selected count wrong:", selectedCount) + } + mu.Unlock() + + err = c0.Close() + if err != nil { + t.Fatal(err) + } + + err = c1.Close() + if err != nil { + t.Fatal(err) + } + + err = c2.Close() + if err != nil { + t.Fatal(err) + } + + err = c3.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index e1558e3fdf..31bbab3aef 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -51,7 +51,7 @@ void NewRemoteParameterUpdater::init( // create parameter server client. if (useEtcd_) { parameterClient_ = paddle_new_etcd_pserver_client( - (char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); + (char *)pserverSpec_.c_str()); } else { parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); From 34add153def8470afaae78f8a722d526c559f5b1 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 8 Aug 2017 14:06:20 -0400 Subject: [PATCH 32/51] fix style check --- paddle/trainer/NewRemoteParameterUpdater.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index 31bbab3aef..af1dceed02 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -50,8 +50,8 @@ void NewRemoteParameterUpdater::init( // create parameter server client. if (useEtcd_) { - parameterClient_ = paddle_new_etcd_pserver_client( - (char *)pserverSpec_.c_str()); + parameterClient_ = + paddle_new_etcd_pserver_client((char *)pserverSpec_.c_str()); } else { parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); From f64539bef9acbf18cc28083499fb2c46daf646c7 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 8 Aug 2017 22:20:32 +0000 Subject: [PATCH 33/51] use random port for embed etcd to avoid port collision --- go/master/service_test.go | 20 ++++++++++++-------- go/pserver/client/etcd_client_test.go | 17 +++++++++++++---- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/go/master/service_test.go b/go/master/service_test.go index 87ce9c9043..2d00c22d6f 100644 --- a/go/master/service_test.go +++ b/go/master/service_test.go @@ -2,7 +2,9 @@ package master_test import ( "io/ioutil" + "net/url" "os" + "strings" "testing" "time" @@ -19,6 +21,10 @@ func TestNewServiceWithEtcd(t *testing.T) { t.Fatal(err) } cfg := embed.NewConfig() + lpurl, _ := url.Parse("http://localhost:0") + lcurl, _ := url.Parse("http://localhost:0") + cfg.LPUrls = []url.URL{*lpurl} + cfg.LCUrls = []url.URL{*lcurl} cfg.Dir = etcdDir e, err := embed.StartEtcd(cfg) if err != nil { @@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) { t.Fatal(err) } }() - select { - case <-e.Server.ReadyNotify(): - t.Log("Server is ready!") - case <-time.After(60 * time.Second): - e.Server.Stop() // trigger a shutdown - t.Fatal("Server took too long to start!") - } - ep := []string{"127.0.0.1:2379"} + <-e.Server.ReadyNotify() + + port := strings.Split(e.Clients[0].Addr().String(), ":")[1] + endpoint := "127.0.0.1:" + port + + ep := []string{endpoint} masterAddr := "127.0.0.1:3306" store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30) if err != nil { diff --git a/go/pserver/client/etcd_client_test.go b/go/pserver/client/etcd_client_test.go index 9daeb97d4c..08742433e7 100644 --- a/go/pserver/client/etcd_client_test.go +++ b/go/pserver/client/etcd_client_test.go @@ -2,7 +2,9 @@ package client_test import ( "io/ioutil" + "net/url" "os" + "strings" "sync" "testing" @@ -16,6 +18,10 @@ func TestSelector(t *testing.T) { t.Fatal(err) } cfg := embed.NewConfig() + lpurl, _ := url.Parse("http://localhost:0") + lcurl, _ := url.Parse("http://localhost:0") + cfg.LPUrls = []url.URL{*lpurl} + cfg.LCUrls = []url.URL{*lcurl} cfg.Dir = etcdDir e, err := embed.StartEtcd(cfg) if err != nil { @@ -31,6 +37,9 @@ func TestSelector(t *testing.T) { <-e.Server.ReadyNotify() + port := strings.Split(e.Clients[0].Addr().String(), ":")[1] + endpoint := "127.0.0.1:" + port + var mu sync.Mutex selectedCount := 0 var wg sync.WaitGroup @@ -53,10 +62,10 @@ func TestSelector(t *testing.T) { } } - c0 := client.NewEtcd("127.0.0.1:2379") - c1 := client.NewEtcd("127.0.0.1:2379") - c2 := client.NewEtcd("127.0.0.1:2379") - c3 := client.NewEtcd("127.0.0.1:2379") + c0 := client.NewEtcd(endpoint) + c1 := client.NewEtcd(endpoint) + c2 := client.NewEtcd(endpoint) + c3 := client.NewEtcd(endpoint) wg.Add(3) go selectAndDone(c0) go selectAndDone(c1) From 6159f5db14b580fab0386fdbe258b26c892be257 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Tue, 8 Aug 2017 16:38:51 -0700 Subject: [PATCH 34/51] code style fix --- paddle/operators/gather.h | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index 5adc1e6b17..8b02156545 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -28,11 +28,8 @@ namespace operators { /* Implementation of CPU copy */ template -void CPUGather(const T* params, - const int* indices, - const int slice_size, - const int index_size, - T* output) { +void CPUGather(const T* params, const int* indices, const int slice_size, + const int index_size, T* output) { const size_t slice_bytes = slice_size * sizeof(T); for (size_t i = 0; i < index_size; ++i) { @@ -47,11 +44,8 @@ void CPUGather(const T* params, d = cuda_stream(gpu_id_, stream_id_); */ template -void GPUGather(const T* src, - const int* index, - const int slice_size, - const int index_size, - T* output); +void GPUGather(const T* src, const int* index, const int slice_size, + const int index_size, T* output); /** * Return a new tensor from source tensor, gathered according to index @@ -60,8 +54,7 @@ void GPUGather(const T* src, * return: output tensor */ template -void Gather(const platform::Place& place, - const paddle::framework::Tensor* src, +void Gather(const platform::Place& place, const paddle::framework::Tensor* src, const paddle::framework::Tensor* index, paddle::framework::Tensor* output) { // check index of shape 1-D @@ -78,10 +71,7 @@ void Gather(const platform::Place& place, // Gathering if (platform::is_cpu_place(place)) { - CPUGather(src->data(), - index->data(), - slice_size, - index_size, + CPUGather(src->data(), index->data(), slice_size, index_size, output->data()); } else { // init for GPU From 54cda76fe8058daefa9f2cb24efe19b035162bf5 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 8 Aug 2017 17:02:55 -0700 Subject: [PATCH 35/51] Refactorize enforece_test.cc --- Dockerfile | 2 +- paddle/platform/CMakeLists.txt | 2 +- paddle/platform/enforce_test.cc | 144 +++++++++++--------------------- 3 files changed, 51 insertions(+), 97 deletions(-) diff --git a/Dockerfile b/Dockerfile index 06a3d89307..8ac123bf9c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ - automake locales clang-format-3.8 swig doxygen cmake \ + automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ net-tools && \ diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bd77bb7daa..4154aad15c 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) -cc_test(enforce_test SRCS enforce_test.cc) +cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece) IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 4dfb697546..5408fce558 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -13,6 +13,10 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/platform/enforce.h" +#include "paddle/string/piece.h" + +using StringPiece = paddle::string::Piece; +using paddle::string::HasPrefix; TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); @@ -22,19 +26,15 @@ TEST(ENFORCE, OK) { } TEST(ENFORCE, FAILED) { - bool in_catch = false; + bool caught_exception = false; try { PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); } catch (paddle::platform::EnforceNotMet error) { - // your error handling code here - in_catch = true; - std::string msg = "Enforce is not ok 123 at all"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "Enforce is not ok 123 at all")); } - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE, NO_ARG_OK) { @@ -47,41 +47,27 @@ TEST(ENFORCE, NO_ARG_OK) { TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { int a = 2; - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_EQ(a, 1 + 3); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce a == 1 + 3 failed, 2 != 4"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4"); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { int a = 2; - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their"); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = - "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + HasPrefix(StringPiece(error.what()), + "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_NE, OK) { @@ -89,42 +75,32 @@ TEST(ENFORCE_NE, OK) { PADDLE_ENFORCE_NE(1.0, 2UL); } TEST(ENFORCE_NE, FAIL) { - bool in_catch = false; + bool caught_exception = false; try { // 2UL here to check data type compatible PADDLE_ENFORCE_NE(1.0, 1UL); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), + "enforce 1.0 != 1UL failed, 1.000000 == 1")) + << error.what() << " does not have expected prefix"; } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } TEST(ENFORCE_GT, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { - // 2UL here to check data type compatible PADDLE_ENFORCE_GT(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GE, OK) { @@ -134,21 +110,16 @@ TEST(ENFORCE_GE, OK) { PADDLE_ENFORCE_GE(3.21, 2UL); } TEST(ENFORCE_GE, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_GE(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 >= 2UL failed, 1 < 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LE, OK) { @@ -159,21 +130,16 @@ TEST(ENFORCE_LE, OK) { PADDLE_ENFORCE_LE(2UL, 3.2); } TEST(ENFORCE_LE, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_GT(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LT, OK) { @@ -182,21 +148,15 @@ TEST(ENFORCE_LT, OK) { PADDLE_ENFORCE_LT(2UL, 3); } TEST(ENFORCE_LT, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_LT(1UL, 0.12); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), + "enforce 1UL < 0.12 failed, 1 >= 0.12")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_NOT_NULL, OK) { @@ -205,20 +165,14 @@ TEST(ENFORCE_NOT_NULL, OK) { delete a; } TEST(ENFORCE_NOT_NULL, FAIL) { - bool in_catch = false; - int* a{nullptr}; - + bool caught_exception = false; try { + int* a = nullptr; PADDLE_ENFORCE_NOT_NULL(a); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "a should not be null"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), "a should not be null")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } From ede02d7d795f6b3fb237c95276fb3c5749c7f216 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 9 Aug 2017 09:05:35 +0800 Subject: [PATCH 36/51] LODTensor (Level of details, or Level of sequences Tensor). (#3109) * add lodtensor * add reshape of lod * add details * rename Elements/Levels * size_t and vector reserve * add details * add const& std::shared_ptr * add lod_tensor_impl.h * remove a shared_ptr --- paddle/framework/CMakeLists.txt | 3 + paddle/framework/details/lod_tensor.cc | 62 ++++++++++ paddle/framework/details/lod_tensor.h | 46 +++++++ paddle/framework/lod_tensor.cc | 51 ++++++++ paddle/framework/lod_tensor.h | 145 ++++++++++++++++++++++ paddle/framework/lod_tensor_impl.h | 60 +++++++++ paddle/framework/lod_tensor_test.cc | 165 +++++++++++++++++++++++++ paddle/framework/tensor.h | 2 + paddle/framework/tensor_test.cc | 2 +- 9 files changed, 535 insertions(+), 1 deletion(-) create mode 100644 paddle/framework/details/lod_tensor.cc create mode 100644 paddle/framework/details/lod_tensor.h create mode 100644 paddle/framework/lod_tensor.cc create mode 100644 paddle/framework/lod_tensor.h create mode 100644 paddle/framework/lod_tensor_impl.h create mode 100644 paddle/framework/lod_tensor_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 33e6baf818..6601918c90 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) +cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor) +cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) + cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) diff --git a/paddle/framework/details/lod_tensor.cc b/paddle/framework/details/lod_tensor.cc new file mode 100644 index 0000000000..9ad3979e5b --- /dev/null +++ b/paddle/framework/details/lod_tensor.cc @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_tensor.h" + +#include + +namespace paddle { +namespace framework { +namespace details { + +using LOD = LODTensor::LOD; + +std::shared_ptr SliceLOD(const LOD &lod, size_t level_begin, + size_t level_end) { + auto new_lod = std::make_shared(); + new_lod->reserve(level_end - level_begin); + for (size_t i = level_begin; i < level_end; i++) { + new_lod->emplace_back(lod[i]); + } + return new_lod; +} + +std::shared_ptr SliceLOD(const LOD &lod, size_t level, size_t elem_begin, + size_t elem_end, bool tensor_shared) { + // slice the lod. + auto new_lod = std::make_shared(); + new_lod->reserve(lod.size() - level); + auto start = lod.at(level)[elem_begin]; + auto end = lod.at(level)[elem_end]; + + for (auto it = lod.begin() + level; it != lod.end(); it++) { + auto it_begin = std::find(it->begin(), it->end(), start); + auto it_end = std::find(it_begin, it->end(), end); + PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); + PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); + new_lod->emplace_back(it_begin, it_end + 1); + if (!tensor_shared) { + // reset offset if tensor is copyed and sliced. + std::transform(new_lod->back().begin(), new_lod->back().end(), + new_lod->back().begin(), + [start](int v) { return v - start; }); + PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD"); + } + } + return new_lod; +} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/details/lod_tensor.h b/paddle/framework/details/lod_tensor.h new file mode 100644 index 0000000000..9a6a6cd2ea --- /dev/null +++ b/paddle/framework/details/lod_tensor.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace framework { +namespace details { + +/* + * Slice levels from LOD. + * + * @lod: LOD to slice. + * @level_begin: level to begin slice. + * @level_end: level to end slice. + */ +std::shared_ptr SliceLOD(const LODTensor::LOD &lod, + size_t level_begin, size_t level_end); + +/* + * Slice elements from a level of LOD. + * + * @lod: LOD to slice. + * @level: which level to slice. + * @elem_begin: element's index to begin slice. + * @elem_end: element's index to end slice. + */ +std::shared_ptr SliceLOD(const LODTensor::LOD &lod, + size_t level, size_t elem_begin, + size_t elem_end, bool tensor_shared); +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc new file mode 100644 index 0000000000..70045dbf7a --- /dev/null +++ b/paddle/framework/lod_tensor.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_tensor.h" + +#include + +namespace paddle { +namespace framework { + +LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); + // slice levels just need to update LOD info, each level will contains the + // whole tensor_, so no need to modify tensor_. + return LODTensor(tensor_, new_lod); +} + +LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin, + size_t elem_end) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, + true /*tensor_shared*/); + + // slice elements just need to update LOD info, because offsets are not + // changed, so the original tensor_ can be reused. + return LODTensor(tensor_, new_lod); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h new file mode 100644 index 0000000000..4933479b10 --- /dev/null +++ b/paddle/framework/lod_tensor.h @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#if (!PADDLE_ONLY_CPU) +#include +#include +#endif + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace framework { + +/* + * LODTensor (Level of details Tensor) + * see https://en.wikipedia.org/wiki/Level_of_details for reference. + */ +class LODTensor { + public: +// Level save offsets of each unit. +#ifdef PADDLE_ONLY_CPU + using Level = std::vector; +#else + using Level = thrust::device_vector; +#endif + // LOD stores offsets of each level of units, the largest units level first, + // then the smaller units level. Each Level stores the offsets of units in + // Tesor. + typedef std::vector LOD; + + LODTensor() {} + LODTensor(const std::shared_ptr &tensor, + const std::shared_ptr &lod) { + Reset(tensor, lod); + } + + void Reset(const std::shared_ptr &tensor, + const std::shared_ptr &lod) { + tensor_ = tensor; + lod_start_pos_ = lod; + } + + /* + * Get a element from LOD. + */ + size_t lod_element(size_t level, size_t elem) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem < NumElements(level), + "element begin [%d] out of range [%d]", elem, + NumElements(level)); + return (*lod_start_pos_)[level][elem]; + } + + /* + * Number of LODTensor's levels, each level has units of data, for example, + * in the sentence's view, article, paragraph, sentence are 3 levels. + */ + size_t NumLevels() const { + return lod_start_pos_ ? lod_start_pos_->size() : 0UL; + } + /* + * Number of elements in a level. + */ + size_t NumElements(size_t level = 0) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + // the last offset is the end of last element + return lod_start_pos_->at(level).size() - 1; + } + + /* + * Slice of levels[level_begin:level_end], with tensor copied. + */ + template + LODTensor SliceCopied(size_t level_begin, size_t level_end, + const platform::Place &dst_place) const; + + /* + * Slice of levels[level_begin:level_end], with tensor shared. + */ + LODTensor SliceShared(size_t level_begin, size_t level_end) const; + + /* + * Slice of elements of a level, [elem_begin: elem_end], with tensor copied. + * @note: low performance in slice lod_start_pos_. + */ + template + LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end, + const platform::Place &dst_place) const; + + /* + * Slice of elements of a level, [elem_begin: elem_end], with tensor shared. + * @note: low performance in slice lod_start_pos_. + */ + LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const; + + /* + * Copy other's lod_start_pos_, to share LOD info. + * @note: the LOD info should not be changed. + */ + void ShareLOD(const LODTensor &other) { + lod_start_pos_ = other.lod_start_pos_; + } + + /* + * Copy other's lod_start_pos_'s content, free to mutate. + */ + void CopyLOD(const LODTensor &other) { + lod_start_pos_ = std::make_shared(*other.lod_start_pos_); + } + /* + * Determine whether LODTensor has a valid LOD info. + */ + bool HasLOD() const { return bool(lod_start_pos_); } + LOD *lod() const { return lod_start_pos_.get(); } + + std::shared_ptr &tensor() { return tensor_; } + Tensor *raw_tensor() { return tensor_.get(); } + + private: + std::shared_ptr lod_start_pos_; + std::shared_ptr tensor_; +}; + +} // namespace framework +} // namespace paddle + +#include "paddle/framework/lod_tensor_impl.h" diff --git a/paddle/framework/lod_tensor_impl.h b/paddle/framework/lod_tensor_impl.h new file mode 100644 index 0000000000..0eb6469aea --- /dev/null +++ b/paddle/framework/lod_tensor_impl.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/details/lod_tensor.h" + +namespace paddle { +namespace framework { + +template +LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end, + const platform::Place &dst_place) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); + auto new_tensor = std::make_shared(); + new_tensor->CopyFrom(*tensor_, dst_place); + + return LODTensor(new_tensor, new_lod); +} + +template +LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin, + size_t elem_end, + const platform::Place &dst_place) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, + false /*tensor_shared*/); + + auto start_idx = new_lod->front().front(); + auto end_idx = new_lod->front().back() - 1 /*the next element's start*/; + auto sliced_tensor = tensor_->Slice(start_idx, end_idx); + auto new_tensor = std::make_shared(); + new_tensor->CopyFrom(sliced_tensor, dst_place); + + return LODTensor(new_tensor, new_lod); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc new file mode 100644 index 0000000000..511716375e --- /dev/null +++ b/paddle/framework/lod_tensor_test.cc @@ -0,0 +1,165 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/lod_tensor.h" + +#include +#include +#include + +namespace paddle { +namespace framework { + +class LODTensorTester : public ::testing::Test { + public: + virtual void SetUp() override { + lod_tensor.reset(new LODTensor); + // tensor's batch_size: 30 + // 3 levels + // 0 10 20 + // 0 5 10 15 20 + // 0 2 5 7 10 12 15 20 + auto lod = std::make_shared(); + lod->push_back(std::vector{0, 10, 20}); + lod->push_back(std::vector{0, 5, 10, 15, 20}); + lod->push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); + + auto tensor = std::make_shared(); + tensor->Resize({20 /*batch size*/, 128 /*dim*/}); + // malloc memory + tensor->mutable_data(place); + + lod_tensor->Reset(tensor, lod); + } + + protected: + std::unique_ptr lod_tensor; + platform::CPUPlace place; +}; + +TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); } + +TEST_F(LODTensorTester, NumElements) { + ASSERT_EQ(lod_tensor->NumElements(0), 2UL); + ASSERT_EQ(lod_tensor->NumElements(1), 4UL); + ASSERT_EQ(lod_tensor->NumElements(2), 8UL); +} + +TEST_F(LODTensorTester, SliceShared_Level) { + // slice 1 level + for (size_t level = 0; level < 3UL; ++level) { + auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1); + ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + } + // slice 2 level + for (size_t level = 0; level < 2UL; ++level) { + auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor->NumElements(level + 1)); + ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + } +} + +TEST_F(LODTensorTester, SliceCopied_Level) { + // slice 1 level + for (size_t level = 0; level < 3UL; ++level) { + auto new_lod_tensor = + lod_tensor->SliceCopied(level, level + 1, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); + // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + // TODO(superjom) add tensor comparation here. + } + // slice 2 level + for (size_t level = 0; level < 2UL; ++level) { + auto new_lod_tensor = + lod_tensor->SliceCopied(level, level + 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor->NumElements(level + 1)); + // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + // TODO(superjom) add tensor comparation here. + } +} + +TEST_F(LODTensorTester, SliceShared_Element) { + size_t level = 0; + auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); +} + +TEST_F(LODTensorTester, SliceCopied_Element) { + size_t level = 0; + auto new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + // LOD is + // 0 5 10 + // 0 2 5 7 10 + new_lod_tensor = lod_tensor->SliceCopied(level, 1, 3, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + + ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL); + ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL); + + // TODO(superjom) compare the content of these tensors +} + +TEST_F(LODTensorTester, ShareLOD) { + LODTensor new_lod_tensor; + new_lod_tensor.ShareLOD(*lod_tensor); + ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod()); +} + +TEST_F(LODTensorTester, CopyLOD) { + LODTensor new_lod_tensor; + new_lod_tensor.CopyLOD(*lod_tensor); + ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod()); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index c44df05e4b..b57958591f 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -18,6 +18,8 @@ limitations under the License. */ #include #include #include +#include + #include "paddle/framework/ddim.h" #include "paddle/memory/memory.h" #include "paddle/platform/device_context.h" diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 20276181b9..7db38d5cae 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -19,7 +19,7 @@ TEST(Tensor, Dims) { using namespace paddle::framework; using namespace paddle::platform; Tensor tt; - tt.Resize(make_ddim({2, 3, 4})); + tt.Resize({2, 3, 4}); DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); for (int i = 0; i < 3; ++i) { From b008360b05cea60634a6afe07b7f2309fc6ea28e Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Wed, 9 Aug 2017 13:08:40 +0800 Subject: [PATCH 37/51] merge InferShapeContext and OperatorContext (#3347) * merge InferShapeContext and OperatorContext * OperatorBase& instead of OperatorBase* --- paddle/framework/operator.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index c324fa6702..ceef9f028b 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -120,10 +120,10 @@ class OperatorBase { std::shared_ptr> in_out_idxs_; }; -class OperatorContext { +class InferShapeContext { public: - OperatorContext(const OperatorBase* op, const Scope& scope) - : op_(*op), scope_(scope) {} + InferShapeContext(const OperatorBase& op, const Scope& scope) + : op_(op), scope_(scope) {} size_t InputSize() const { return op_.inputs_.size(); } @@ -234,12 +234,6 @@ class OperatorContext { const Scope& scope_; }; -class InferShapeContext : public OperatorContext { - public: - InferShapeContext(const OperatorBase* op, const Scope& scope) - : OperatorContext(op, scope) {} -}; - template struct EigenDeviceConverter; @@ -255,11 +249,11 @@ struct EigenDeviceConverter { }; #endif -class ExecutionContext : public OperatorContext { +class ExecutionContext : public InferShapeContext { public: - ExecutionContext(const OperatorBase* op, const Scope& scope, + ExecutionContext(const OperatorBase& op, const Scope& scope, const platform::DeviceContext* device_context) - : OperatorContext(op, scope), device_context_(device_context) {} + : InferShapeContext(op, scope), device_context_(device_context) {} template , OpKernelHash>; void InferShape(const Scope& scope) const override { - InferShape(InferShapeContext(this, scope)); + InferShape(InferShapeContext(*this, scope)); } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); - opKernel->Compute(ExecutionContext(this, scope, &dev_ctx)); + opKernel->Compute(ExecutionContext(*this, scope, &dev_ctx)); } static std::unordered_map& From a573dd4cc6f5a41ddbeec1be560d587f61029005 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 13:21:12 +0800 Subject: [PATCH 38/51] Use ostream << operator to get to_string * Make `PADDLE_ENFORCE_EQ` supports custom class, like DDim --- paddle/platform/enforce.h | 7 ++--- paddle/platform/enforce_test.cc | 40 +++++++++++++++++++++++++++- paddle/string/CMakeLists.txt | 1 + paddle/string/to_string.h | 40 ++++++++++++++++++++++++++++ paddle/string/to_string_test.cc | 46 +++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 paddle/string/to_string.h create mode 100644 paddle/string/to_string_test.cc diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index d2adb997de..337a059fb1 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -15,11 +15,12 @@ limitations under the License. */ #pragma once #include -#include #include #include #include #include +#include "paddle/string/printf.h" +#include "paddle/string/to_string.h" #ifndef PADDLE_ONLY_CPU @@ -194,8 +195,8 @@ inline void throw_on_error(T e) { #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ - #__VAL0, #__VAL1, std::to_string(__VAL0), \ - std::to_string(__VAL1), \ + #__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \ + paddle::string::to_string(__VAL1), \ paddle::string::Sprintf("" __VA_ARGS__)); } // namespace platform diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 5408fce558..80bdee3d9d 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,6 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include #include #include "gtest/gtest.h" @@ -83,7 +85,7 @@ TEST(ENFORCE_NE, FAIL) { } catch (paddle::platform::EnforceNotMet error) { caught_exception = true; EXPECT_TRUE(HasPrefix(StringPiece(error.what()), - "enforce 1.0 != 1UL failed, 1.000000 == 1")) + "enforce 1.0 != 1UL failed, 1 == 1")) << error.what() << " does not have expected prefix"; } EXPECT_TRUE(caught_exception); @@ -176,3 +178,39 @@ TEST(ENFORCE_NOT_NULL, FAIL) { } EXPECT_TRUE(caught_exception); } + +struct Dims { + size_t dims_[4]; + + bool operator==(const Dims& o) const { + for (size_t i = 0; i < 4; ++i) { + if (dims_[i] != o.dims_[i]) return false; + } + return true; + } +}; + +std::ostream& operator<<(std::ostream& os, const Dims& d) { + for (size_t i = 0; i < 4; ++i) { + if (i == 0) { + os << "["; + } + os << d.dims_[i]; + if (i == 4 - 1) { + os << "]"; + } else { + os << ", "; + } + } + return os; +} + +TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { + Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}}; + PADDLE_ENFORCE_EQ(a, b); +} + +TEST(ENFORCE_USER_DEFINED_CLASS, NE) { + Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; + ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); +} \ No newline at end of file diff --git a/paddle/string/CMakeLists.txt b/paddle/string/CMakeLists.txt index 5becf62672..60667b7287 100644 --- a/paddle/string/CMakeLists.txt +++ b/paddle/string/CMakeLists.txt @@ -2,3 +2,4 @@ cc_library(stringpiece SRCS piece.cc) cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) +cc_test(to_string_test SRCS to_string_test.cc) diff --git a/paddle/string/to_string.h b/paddle/string/to_string.h new file mode 100644 index 0000000000..4f478b6a36 --- /dev/null +++ b/paddle/string/to_string.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include + +namespace paddle { +namespace string { +template +inline std::string to_string(T v) { + std::ostringstream sout; + sout << v; + return sout.str(); +} + +// Faster std::string/const char* type +template <> +inline std::string to_string(std::string v) { + return v; +} + +template <> +inline std::string to_string(const char* v) { + return std::string(v); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc new file mode 100644 index 0000000000..0ef06eac24 --- /dev/null +++ b/paddle/string/to_string_test.cc @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/string/to_string.h" +#include + +constexpr char OUT_STR[] = "User Defined Output"; +class UserDefinedClass { +public: +}; + +std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { + s << OUT_STR; + return s; +} + +TEST(to_string, normal) { + using namespace paddle::string; + ASSERT_EQ(std::to_string(10), to_string(10)); + ASSERT_EQ("abc", to_string("abc")); + + auto std_to_string = std::to_string(1.2); + auto my_to_string = to_string(1.2); + + // std::to_string might fill zero after float value, like 1.2000 + for (size_t i = 0; i < my_to_string.size(); ++i) { + ASSERT_EQ(my_to_string[i], std_to_string[i]); + } +} + +TEST(to_string, user_defined) { + using namespace paddle::string; + UserDefinedClass instance; + ASSERT_EQ(OUT_STR, to_string(instance)); +} \ No newline at end of file From e67a1c928d6ee3c0588d6b31c510c3e41ef83b38 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 13:59:07 +0800 Subject: [PATCH 39/51] Make android compile pass --- paddle/string/to_string_test.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc index 0ef06eac24..57b4010626 100644 --- a/paddle/string/to_string_test.cc +++ b/paddle/string/to_string_test.cc @@ -25,6 +25,11 @@ std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { return s; } +// android macro comes from +// https://stackoverflow.com/questions/15328751/android-macro-suddenly-not-defined +#if !defined(ANDROID) && !defined(__ANDROID__) +// In android, std::to_string is not defined. +// https://stackoverflow.com/questions/22774009/android-ndk-stdto-string-support TEST(to_string, normal) { using namespace paddle::string; ASSERT_EQ(std::to_string(10), to_string(10)); @@ -38,6 +43,7 @@ TEST(to_string, normal) { ASSERT_EQ(my_to_string[i], std_to_string[i]); } } +#endif TEST(to_string, user_defined) { using namespace paddle::string; From d8a3291d87f20b6e4973bd9735e7a761752a10f1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 14:14:47 +0800 Subject: [PATCH 40/51] Refine unit-test for to_string --- paddle/string/to_string_test.cc | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc index 57b4010626..4a075751ac 100644 --- a/paddle/string/to_string_test.cc +++ b/paddle/string/to_string_test.cc @@ -25,25 +25,12 @@ std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { return s; } -// android macro comes from -// https://stackoverflow.com/questions/15328751/android-macro-suddenly-not-defined -#if !defined(ANDROID) && !defined(__ANDROID__) -// In android, std::to_string is not defined. -// https://stackoverflow.com/questions/22774009/android-ndk-stdto-string-support TEST(to_string, normal) { using namespace paddle::string; - ASSERT_EQ(std::to_string(10), to_string(10)); + ASSERT_EQ("10", to_string(10)); ASSERT_EQ("abc", to_string("abc")); - - auto std_to_string = std::to_string(1.2); - auto my_to_string = to_string(1.2); - - // std::to_string might fill zero after float value, like 1.2000 - for (size_t i = 0; i < my_to_string.size(); ++i) { - ASSERT_EQ(my_to_string[i], std_to_string[i]); - } + ASSERT_EQ("1.2", to_string(1.2)); } -#endif TEST(to_string, user_defined) { using namespace paddle::string; From 2d35c7008117cc2ec7c1a079947fa4537d6d2f58 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 14:29:24 +0800 Subject: [PATCH 41/51] Fit google name style --- paddle/string/to_string_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc index 4a075751ac..5ff1b007f1 100644 --- a/paddle/string/to_string_test.cc +++ b/paddle/string/to_string_test.cc @@ -15,13 +15,13 @@ #include "paddle/string/to_string.h" #include -constexpr char OUT_STR[] = "User Defined Output"; +constexpr char kOutputString[] = "User Defined Output"; class UserDefinedClass { public: }; std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { - s << OUT_STR; + s << kOutputString; return s; } @@ -35,5 +35,5 @@ TEST(to_string, normal) { TEST(to_string, user_defined) { using namespace paddle::string; UserDefinedClass instance; - ASSERT_EQ(OUT_STR, to_string(instance)); + ASSERT_EQ(kOutputString, to_string(instance)); } \ No newline at end of file From df4fe671fe59863b5cbb3b595da544016b678199 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 9 Aug 2017 14:47:56 +0800 Subject: [PATCH 42/51] "remove attribute" --- paddle/framework/operator.cc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 511323b6f5..d9a013b883 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/framework/operator.h" #include -#include + +#include "paddle/framework/operator.h" namespace paddle { namespace framework { @@ -103,16 +103,6 @@ std::string OperatorBase::DebugString() const { ss << ", "; } } - ss << "), "; - ss << "Attrs:("; - size_t i = 0; - for (auto& attr : attrs_) { - ss << attr.first; - if (i != attrs_.size() - 1) { - ss << ", "; - } - i++; - } ss << ")."; return ss.str(); } From 6bac3e17b5b1f9e6a0ebb34ff43e959a971ef111 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 9 Aug 2017 15:01:37 +0800 Subject: [PATCH 43/51] "remove unused test net modified" --- paddle/operators/gaussian_random_op.cc | 7 ++++--- paddle/operators/gaussian_random_op.cu | 5 ++--- .../v2/framework/tests/test_gaussian_random_op.py | 4 +++- python/paddle/v2/framework/tests/test_net.py | 12 ++++++------ 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index b0b68ff36d..ef417ae2f0 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -22,8 +22,8 @@ template class GaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - T mean = static_cast(context.op_.GetAttr("mean")); - T std = static_cast(context.op_.GetAttr("std")); + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); auto* tensor = context.Output(0); T* data = tensor->mutable_data(context.GetPlace()); @@ -35,7 +35,8 @@ class GaussianRandomKernel : public framework::OpKernel { } std::mt19937 g(seed); std::normal_distribution distribution(mean, std); - for (int i = 0; i < framework::product(tensor->dims()); ++i) { + ssize_t size = framework::product(tensor->dims()); + for (int i = 0; i < size; ++i) { data[i] = distribution(g); } } diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 164753f946..54e4ae5d2b 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -26,8 +26,8 @@ template class GaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - T mean = static_cast(context.op_.GetAttr("mean")); - T std = static_cast(context.op_.GetAttr("std")); + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); auto* tensor = context.Output(0); T* data = tensor->mutable_data(context.GetPlace()); @@ -40,7 +40,6 @@ class GaussianRandomKernel : public framework::OpKernel { &g, CURAND_RNG_PSEUDO_DEFAULT)); PADDLE_ENFORCE( platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); - // auto g = const_cast(ctx)->RandGenerator(); curandGenerateNormal(g, data, framework::product(tensor->dims()), mean, std); } diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index 0ff8c89a14..20c68007b5 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -14,13 +14,15 @@ class GaussianRandomTest(unittest.TestCase): def test_gaussian_random(self, place): scope = core.Scope() scope.new_var("Out").get_tensor() + op = Operator( "gaussian_random", Out="Out", dims=[1000, 784], mean=.0, std=1., - seed=0) + seed=10) + op.infer_shape(scope) context = core.DeviceContext.create(place) op.run(scope, context) diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index 7df9b997b1..b30896553d 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -16,13 +16,13 @@ class TestNet(unittest.TestCase): net.complete_add_op(True) expected = ''' - Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). - Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). +Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). + Op(add_two), inputs:(X, Y), outputs:(Out). + Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). Op(fc), inputs:(X, w, @EMPTY@), outputs:(fc.out, @TEMP@fc@0). - Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). - Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). - ''' + Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). + Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). +''' self.assertEqual(expected, "\n" + str(net)) From b228b463fa6f1a4cf1f102dcea1eff61f16cc698 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 15:09:57 +0800 Subject: [PATCH 44/51] Make const variables in operator.h fit google style * No POD instance is forbidden in global scope. See https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables --- paddle/framework/backward.cc | 6 ++-- paddle/framework/backward_test.cc | 31 +++++++++--------- paddle/framework/grad_op_builder_test.cc | 41 +++++++++++------------- paddle/framework/operator.h | 8 ++--- paddle/operators/mean_op.cc | 2 +- paddle/operators/mean_op.h | 4 +-- 6 files changed, 44 insertions(+), 48 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 47983110fa..be6656792f 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -133,8 +133,8 @@ std::shared_ptr BackwardRecursive( std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); for (std::string& grad_input : grad_op->inputs_) { if (no_grad_names.count(grad_input)) { - std::string prefix = - grad_input.substr(0, grad_input.size() - kGradVarSuffix.size()); + std::string prefix = grad_input.substr( + 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char)); grad_input = prefix + kZeroVarSuffix; // If part of input gradient of that operator is not calculated, fill @@ -167,7 +167,7 @@ std::shared_ptr Backward( std::unordered_set no_grad_names; no_grad_names.reserve(no_grad_vars.size()); - no_grad_names.insert(kEmptyVarName + kGradVarSuffix); + no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); for (auto& name : no_grad_vars) { no_grad_names.insert(name + kGradVarSuffix); diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 6d5835bd22..1677a3ed4c 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -171,10 +171,10 @@ TEST(Backward, simple_op_grad) { ASSERT_EQ(4UL, gop->inputs_.size()); ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); ASSERT_EQ("rowwise_add_grad", gop->type_); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]); - ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]); + ASSERT_EQ(f::GradVarName("X"), gop->outputs_[0]); + ASSERT_EQ(f::GradVarName("b"), gop->outputs_[1]); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix)); + ASSERT_EQ(f::GradVarName("X"), gop->Output(f::GradVarName("X"))); } TEST(Backward, simple_op_not_need_grad) { @@ -182,7 +182,7 @@ TEST(Backward, simple_op_not_need_grad) { ASSERT_NE(fwd, nullptr); auto gop = f::Backward(*fwd, {"X"}); ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), - "X" + f::kGradVarSuffix), + f::GradVarName("X")), gop->outputs_.end()); auto no_input_gop = f::Backward(*fwd, {"X", "b"}); @@ -250,18 +250,18 @@ TEST(Backward, net_input_of_network_not_need_grad) { all_output.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end()); + ASSERT_NE(all_output.find(f::GradVarName(out)), all_output.end()); } // Not Generated X - ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end()); + ASSERT_EQ(all_output.find(f::GradVarName("X")), all_output.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix)); + first_fc_grad->ops_[2]->Output(f::GradVarName("A"))); } TEST(Backward, net_shared_weight) { @@ -313,15 +313,15 @@ TEST(Backward, op_part_of_output_are_not_need) { ASSERT_EQ(1UL, fill_zero.inputs_.size()); ASSERT_EQ("Z", fill_zero.inputs_[0]); ASSERT_EQ(1UL, fill_zero.outputs_.size()); - ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.outputs_[0]); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.type_); ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG - ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix)); - ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix)); - ASSERT_EQ("X" + f::kGradVarSuffix, - d_many_out.Output("x" + f::kGradVarSuffix)); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, + d_many_out.Input(f::GradVarName("z"))); + ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); + ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); } TEST(Backward, op_part_of_input_are_not_need) { @@ -331,10 +331,9 @@ TEST(Backward, op_part_of_input_are_not_need) { ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL); - ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix); - ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix), - "out" + f::kGradVarSuffix); + ASSERT_EQ(grad_mul.Output(f::GradVarName("A")), f::kEmptyVarName); + ASSERT_EQ(grad_mul.Output(f::GradVarName("B")), f::GradVarName("b")); + ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); ASSERT_EQ(grad_mul.Input("A"), "a"); ASSERT_EQ(grad_mul.Input("B"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index cf7143eba4..f1ebbae52f 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -83,21 +83,19 @@ TEST(GradOpBuilder, MutiInOut) { EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), std::vector({"out2_1", "out2_2"})); - EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix), - "out1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")), + f::GradVarName("out1")); + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")), std::vector( - {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix})); + {f::GradVarName("out2_1"), f::GradVarName("out2_2")})); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), - std::vector({"in2_1" + f::kGradVarSuffix, - "in2_2" + f::kGradVarSuffix, - "in2_3" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix), - "in3" + f::kGradVarSuffix); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), + std::vector({f::GradVarName("in2_1"), + f::GradVarName("in2_2"), + f::GradVarName("in2_3")})); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3")); } TEST(GradOpBuilder, IOIgnoredInGradient) { @@ -119,19 +117,18 @@ TEST(GradOpBuilder, IOIgnoredInGradient) { EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), std::vector({"out1_1", "out1_2"})); EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); - EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")), std::vector( - {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix), - "out2" + f::kGradVarSuffix); + {f::GradVarName("out1_1"), f::GradVarName("out1_2")})); + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")), + f::GradVarName("out2")); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), std::vector( - {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix), + {f::GradVarName("in2_1"), f::GradVarName("in2_2")})); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")), std::vector( - {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix})); + {f::GradVarName("in3_1"), f::GradVarName("in3_2")})); } diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index ceef9f028b..8949baf60e 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -33,19 +33,19 @@ namespace paddle { namespace framework { /// If a variable is a empty variable, that name will be used. -const std::string kEmptyVarName = "@EMPTY@"; +constexpr char kEmptyVarName[] = "@EMPTY@"; /// If a variable is a temporary variable, that name will be set in Python, /// but it will be convert to a unique name in scope after OpCreator. -const std::string kTempVarName = "@TEMP@"; +constexpr char kTempVarName[] = "@TEMP@"; /// If a variable's name has a certain suffix, it means that the /// variable is the gradient of another varibale. /// e.g. Variable "x@GRAD" is the gradient of varibale "x". -const std::string kGradVarSuffix = "@GRAD"; +constexpr char kGradVarSuffix[] = "@GRAD"; /// Variables with this suffix are supposed to be filled up with zeros. -const std::string kZeroVarSuffix = "@ZERO"; +constexpr char kZeroVarSuffix[] = "@ZERO"; inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 997b0c514e..2ea049cb36 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -41,7 +41,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { class MeanGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("X" + framework::kGradVarSuffix) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index f3db0a29bb..e8595a14fa 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -48,10 +48,10 @@ template class MeanGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto OG = context.Input("Out" + framework::kGradVarSuffix); + auto OG = context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE(framework::product(OG->dims()) == 1, "Mean Gradient should be scalar"); - auto IG = context.Output("X" + framework::kGradVarSuffix); + auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); T ig_size = (T)framework::product(IG->dims()); From 6c7c4333f83b43de3c4cd6813cf6433bb563b56f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 16:05:22 +0800 Subject: [PATCH 45/51] Fix TravisCI test --- paddle/framework/backward.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index be6656792f..437a44a8aa 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -133,8 +133,9 @@ std::shared_ptr BackwardRecursive( std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); for (std::string& grad_input : grad_op->inputs_) { if (no_grad_names.count(grad_input)) { + // +1 for \0 std::string prefix = grad_input.substr( - 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char)); + 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); grad_input = prefix + kZeroVarSuffix; // If part of input gradient of that operator is not calculated, fill From bbd7378b4386623b1946a7e5ae82be4cfb2f01e2 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 9 Aug 2017 17:08:56 +0800 Subject: [PATCH 46/51] "ci job failed weired. restart ci job." --- .../v2/framework/tests/test_gaussian_random_op.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index 20c68007b5..f95ed70b58 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -6,12 +6,13 @@ import numpy class GaussianRandomTest(unittest.TestCase): def test_cpu(self): - self.test_gaussian_random(place=core.CPUPlace()) + self.gaussian_random_test(place=core.CPUPlace()) def test_gpu(self): - self.test_gaussian_random(place=core.GPUPlace(0)) + if core.is_compile_gpu(): + self.gaussian_random_test(place=core.GPUPlace(0)) - def test_gaussian_random(self, place): + def gaussian_random_test(self, place): scope = core.Scope() scope.new_var("Out").get_tensor() @@ -27,8 +28,8 @@ class GaussianRandomTest(unittest.TestCase): context = core.DeviceContext.create(place) op.run(scope, context) tensor = numpy.array(scope.find_var("Out").get_tensor()) - self.assertAlmostEqual(numpy.mean(tensor), .0, places=3) - self.assertAlmostEqual(numpy.std(tensor), 1., places=3) + self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) + self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) if __name__ == '__main__': From c957445c72fd8f2c0354d8b430ef37f47ac3bc73 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 17:51:21 +0800 Subject: [PATCH 47/51] A better error message for gradient checker * Give which parameter, which element are wrong. And what max_diff is. --- paddle/framework/pybind.cc | 9 +++- .../v2/framework/tests/gradient_checker.py | 41 +++++++++++-------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 915ffb1c00..9139a496ec 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/string/to_string.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -205,9 +206,13 @@ All parameter, weight, gradient are variables in Paddle. }); // clang-format on - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace") + .def(py::init()) + .def("__str__", string::to_string); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace") + .def(py::init<>()) + .def("__str__", string::to_string); py::class_> operator_base( m, "Operator"); diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index b73c4869d1..7c4eda5f30 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -92,15 +92,26 @@ def get_numeric_gradient(op, class GradientChecker(unittest.TestCase): - def __is_close(self, numeric_grads, scope, max_relative_error): + def __is_close(self, numeric_grads, scope, max_relative_error, msg_prefix): for name in numeric_grads: - op_grad = numpy.array( - scope.find_var(grad_var_name(name)).get_tensor()) - is_close = numpy.allclose( - numeric_grads[name], op_grad, rtol=max_relative_error, atol=100) - if not is_close: - return False - return True + b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + a = numeric_grads[name] + + abs_a = numpy.abs(a) + # if abs_a is nearly zero, then use abs error for a, not relative + # error. + abs_a[abs_a < 1e-3] = 1 + + diff_mat = numpy.abs(a - b) / abs_a + max_diff = numpy.max(diff_mat) + + def err_msg(): + offset = numpy.argmax(diff_mat > max_relative_error) + return "%s Variable %s max gradient diff %f over limit %f, the first " \ + "error element is %d" % ( + msg_prefix, name, max_diff, max_relative_error, offset) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) def check_grad(self, forward_op, @@ -145,7 +156,8 @@ class GradientChecker(unittest.TestCase): # get numeric gradient for check_name in inputs_to_check: numeric_grad[check_name] = \ - get_numeric_gradient(forward_op, input_vars, output_name, check_name) + get_numeric_gradient(forward_op, input_vars, output_name, + check_name) # get operator gradient according to different device for place in places: @@ -187,15 +199,8 @@ class GradientChecker(unittest.TestCase): backward_op.infer_shape(scope) backward_op.run(scope, ctx) - if isinstance(place, core.CPUPlace): - msg = "CPU kernel gradient is not close to numeric gradient" - else: - if isinstance(place, core.GPUPlace): - msg = "GPU kernel gradient is not close to numeric gradient" - else: - raise ValueError("unknown place " + type(place)) - self.assertTrue( - self.__is_close(numeric_grad, scope, max_relative_error), msg) + self.__is_close(numeric_grad, scope, max_relative_error, + "Gradient Check On %s" % str(place)) if __name__ == '__main__': From f0a85b08053440b9a49346f6d07cc106472c5c33 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 18:03:39 +0800 Subject: [PATCH 48/51] Rename __is_close -> assert_is_close() --- python/paddle/v2/framework/tests/gradient_checker.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 7c4eda5f30..aacc5e88fe 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -92,7 +92,8 @@ def get_numeric_gradient(op, class GradientChecker(unittest.TestCase): - def __is_close(self, numeric_grads, scope, max_relative_error, msg_prefix): + def assert_is_close(self, numeric_grads, scope, max_relative_error, + msg_prefix): for name in numeric_grads: b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) a = numeric_grads[name] @@ -199,8 +200,8 @@ class GradientChecker(unittest.TestCase): backward_op.infer_shape(scope) backward_op.run(scope, ctx) - self.__is_close(numeric_grad, scope, max_relative_error, - "Gradient Check On %s" % str(place)) + self.assert_is_close(numeric_grad, scope, max_relative_error, + "Gradient Check On %s" % str(place)) if __name__ == '__main__': From 840d0c74025306985a814c1480851f69923b580a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 9 Aug 2017 18:11:21 +0800 Subject: [PATCH 49/51] Remove unnecessary C++ operator test They are tested in Python --- paddle/operators/CMakeLists.txt | 3 --- paddle/operators/add_op_test.cc | 28 ---------------------------- paddle/operators/mean_op_test.cc | 25 ------------------------- paddle/operators/sgd_op_test.cc | 22 ---------------------- 4 files changed, 78 deletions(-) delete mode 100644 paddle/operators/add_op_test.cc delete mode 100644 paddle/operators/mean_op_test.cc delete mode 100644 paddle/operators/sgd_op_test.cc diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 9e4026d1c6..af22229978 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -45,10 +45,8 @@ cc_library(net_op SRCS net_op.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) op_library(add_op SRCS add_op.cc add_op.cu) -cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) op_library(mean_op SRCS mean_op.cc mean_op.cu) -cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op) op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) @@ -59,7 +57,6 @@ op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) -cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op) op_library(fc_op SRCS fc_op.cc diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc deleted file mode 100644 index bf529defb2..0000000000 --- a/paddle/operators/add_op_test.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#define private public -#include "paddle/framework/op_registry.h" - -USE_OP(add_two); - -TEST(AddOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("add_two"); - ASSERT_NE(it, protos.end()); - auto& op_creators = paddle::framework::OpRegistry::op_creators(); - auto it1 = op_creators.find("add_two_grad"); - ASSERT_NE(it1, op_creators.end()); -} diff --git a/paddle/operators/mean_op_test.cc b/paddle/operators/mean_op_test.cc deleted file mode 100644 index 375dcd50e1..0000000000 --- a/paddle/operators/mean_op_test.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include - -USE_OP(mean); - -TEST(MeanOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("mean"); - ASSERT_NE(it, protos.end()); -} diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc deleted file mode 100644 index 75137259f5..0000000000 --- a/paddle/operators/sgd_op_test.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -USE_OP(sgd); -TEST(SGDOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("sgd"); - ASSERT_NE(it, protos.end()); -} From f702e7977ddf571c7d23b8a3b26dc50e4731857b Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Wed, 9 Aug 2017 20:01:00 +0800 Subject: [PATCH 50/51] "relauch ci" --- python/paddle/v2/framework/tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 785a589c24..f6850e0651 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -21,7 +21,6 @@ py_test(gradient_checker SRCS gradient_checker.py) py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) -py_test(test_op_creation_methods SRCS test_op_creation_methods.py) py_test(test_operator SRCS test_operator.py) From d4e4cebf5f95c0edd1788d81780491cd90e18236 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Wed, 9 Aug 2017 11:42:40 -0700 Subject: [PATCH 51/51] fix all coding-style problems --- paddle/operators/gather.h | 20 +++----------------- paddle/operators/gather_test.cc | 2 -- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index 8b02156545..0c73717d38 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -20,13 +20,10 @@ limitations under the License. */ #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -using paddle::framework::Tensor; -using paddle::framework::DDim; - namespace paddle { namespace operators { -/* Implementation of CPU copy */ +// Implementation of CPU copy template void CPUGather(const T* params, const int* indices, const int slice_size, const int index_size, T* output) { @@ -34,15 +31,11 @@ void CPUGather(const T* params, const int* indices, const int slice_size, for (size_t i = 0; i < index_size; ++i) { int index_ = indices[i]; - // copy src[index_] to output[i] memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); } } -/* Implementation of GPU copy: - I suppose the GPUDevice& d, contains gpu_id and thread_id - d = cuda_stream(gpu_id_, stream_id_); -*/ +// Implementation of GPU copy: template void GPUGather(const T* src, const int* index, const int slice_size, const int index_size, T* output); @@ -62,7 +55,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, int index_size = index->dims()[0]; auto src_dims = src->dims(); - DDim output_dims(src_dims); + paddle::framework::DDim output_dims(src_dims); output_dims[0] = index_size; // slice size @@ -73,13 +66,6 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, if (platform::is_cpu_place(place)) { CPUGather(src->data(), index->data(), slice_size, index_size, output->data()); - } else { - // init for GPU - // output_arr = output->mutable_data(output_dims, platform::GPUPlace()); - // how to specialize device?? - // GPUGather( - // d, src->data(), index->data(), slice_size, - // new_tensor->mutable_data()); } } diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc index 5d84b7b5f3..5de748ec46 100644 --- a/paddle/operators/gather_test.cc +++ b/paddle/operators/gather_test.cc @@ -29,7 +29,6 @@ TEST(Gather, GatherData) { Tensor* src = new Tensor(); Tensor* index = new Tensor(); Tensor* output = new Tensor(); - // src.Resize(make_ddim({3, 4})); int* p_src = nullptr; int* p_index = nullptr; @@ -40,7 +39,6 @@ TEST(Gather, GatherData) { p_index[0] = 1; p_index[1] = 0; - // gather int* p_output = output->mutable_data(make_ddim({2, 4}), CPUPlace()); Gather(CPUPlace(), src, index, output);