From 0446b488fd3db0b3e1b3c03a9a653a2843bdefca Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 16 Feb 2017 20:46:10 +0800 Subject: [PATCH 01/17] LayerOutput for single machine multiple devices --- .../gradientmachines/GradientMachine.h | 2 + .../gradientmachines/MultiGradientMachine.cpp | 38 +++++++++++++++++++ .../gradientmachines/MultiGradientMachine.h | 2 + .../gradientmachines/NeuralNetwork.cpp | 1 + .../gserver/gradientmachines/NeuralNetwork.h | 3 +- 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 0829968d87..201b65bc45 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,6 +134,8 @@ public: backward(callback); } + virtual MatrixPtr getLayerOutput(const std::string& layerName) = 0; + // see comment in Layer.h for the function with the same name virtual void resetState() {} diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 80f223824d..a571b3d72f 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,6 +282,44 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } +MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { + // neural networks are same in each trainer thread + // layer output height = height of layer output * thread nums + auto nn = dynamic_cast(threads_[0]->getGradientMachine()); + auto height = nn->getLayerOutput(layerName)->getHeight() * threads_.size(); + auto stream = HPPL_STREAM_DEFAULT; + + auto copyLayerOutput = [height, stream]( + MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) { + size_t width = src->getWidth(); + if (!dst) { + dst = src->clone(height, width, useGpu); + } else { + dst->resize(height, width); + } + + MatrixPtr tmpMatrix = dst->subMatrix(startRow, src->getHeight()); + tmpMatrix->copyFrom(*src, stream); + }; + + MatrixPtr mats; + size_t startRow = 0; + + // copy one layer output from one trainer thread at each time + for (auto& thread : threads_) { + auto nn = dynamic_cast(thread->getGradientMachine()); + auto mat = nn->getLayerOutput(layerName); + copyLayerOutput(mats, mat, startRow, useGpu_); + startRow += mat->getHeight(); + } + + if (useGpu_) { + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + } + + return mats; +} + void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { for (size_t i = 0; i < parameters_.size(); i++) { if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue; diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 9be15ef4bc..988d509817 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,6 +189,8 @@ public: PassType passType, const UpdateCallback& callback); + virtual MatrixPtr getLayerOutput(const std::string& layerName); + virtual void onPassEnd(); virtual void finish(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 22051e07ee..1f9ace4f67 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -298,6 +298,7 @@ MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; return it->second->getOutputValue(); } + void NeuralNetwork::onPassEnd() { for (auto& layer : layers_) { layer->onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 25af4abcf8..bf9ed09327 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,8 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - MatrixPtr getLayerOutput(const std::string& layerName); + virtual MatrixPtr getLayerOutput(const std::string& layerName); + const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); CHECK(it != layerMap_.end()) << "Unknown layer " << layerName; From 7c5fd231063908e1d7699c995d1acebb2d321aa9 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 13:08:39 +0800 Subject: [PATCH 02/17] Update MultiGradientMachine::getLayerOutput --- .../gradientmachines/MultiGradientMachine.cpp | 41 ++++++++----------- paddle/gserver/layers/CosSimLayer.cpp | 2 +- paddle/math/tests/test_RowBuffer.cpp | 8 ++-- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index a571b3d72f..56b1836e41 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -283,41 +283,34 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, } MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { - // neural networks are same in each trainer thread - // layer output height = height of layer output * thread nums - auto nn = dynamic_cast(threads_[0]->getGradientMachine()); - auto height = nn->getLayerOutput(layerName)->getHeight() * threads_.size(); - auto stream = HPPL_STREAM_DEFAULT; - - auto copyLayerOutput = [height, stream]( - MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) { - size_t width = src->getWidth(); - if (!dst) { - dst = src->clone(height, width, useGpu); - } else { - dst->resize(height, width); - } + // each thread has the same neuro network + auto nn = threads_[0]->getGradientMachine(); - MatrixPtr tmpMatrix = dst->subMatrix(startRow, src->getHeight()); - tmpMatrix->copyFrom(*src, stream); - }; + size_t height = 0; + size_t width = nn->getLayerOutput(layerName)->getWidth(); + for (auto& thread : threads_) { + auto out = thread->getGradientMachine()->getLayerOutput(layerName); + height += out->getHeight(); + CHECK_EQ(width, out->getWidth()); + } - MatrixPtr mats; - size_t startRow = 0; + MatrixPtr dst; + Matrix::resizeOrCreate(dst, height, width, false, useGpu_); // copy one layer output from one trainer thread at each time + size_t startRow = 0; for (auto& thread : threads_) { - auto nn = dynamic_cast(thread->getGradientMachine()); - auto mat = nn->getLayerOutput(layerName); - copyLayerOutput(mats, mat, startRow, useGpu_); - startRow += mat->getHeight(); + auto src = thread->getGradientMachine()->getLayerOutput(layerName); + auto tmpMatrix = dst->subMatrix(startRow, src->getHeight()); + tmpMatrix->copyFrom(*src, HPPL_STREAM_DEFAULT); + startRow += src->getHeight(); } if (useGpu_) { hl_stream_synchronize(HPPL_STREAM_DEFAULT); } - return mats; + return dst; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index a6c0300acf..1501c74370 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -42,7 +42,7 @@ void CosSimLayer::forward(PassType passType) { /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); - CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); diff --git a/paddle/math/tests/test_RowBuffer.cpp b/paddle/math/tests/test_RowBuffer.cpp index 5f66f22ef7..8cc4c69a1a 100644 --- a/paddle/math/tests/test_RowBuffer.cpp +++ b/paddle/math/tests/test_RowBuffer.cpp @@ -17,10 +17,10 @@ limitations under the License. */ TEST(RowBuffer, testAutoGrow) { paddle::RowBuffer buf(128); - ASSERT_EQ(128, buf.getWidth()); + ASSERT_EQ(128UL, buf.getWidth()); ASSERT_TRUE(buf.isAutoGrowth()); buf.resize(2); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } @@ -35,7 +35,7 @@ TEST(RowBuffer, testAutoGrow) { data[i] = i; } - ASSERT_EQ(3, buf.getRowCount()); + ASSERT_EQ(3UL, buf.getRowCount()); for (size_t i = 0; i < buf.getRowCount() - 1; ++i) { for (size_t j = 0; j < buf.getWidth(); ++j) { ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); @@ -51,7 +51,7 @@ TEST(RowBuffer, testWithMemBuf) { std::make_shared(128 * 2 * sizeof(real)); paddle::RowBuffer buf(mem, 128); ASSERT_TRUE(!buf.isAutoGrowth()); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } From 258e5ec59f28f617397646edbf67d4a576f0d3f0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 13:23:38 +0800 Subject: [PATCH 03/17] update GradientMachine API --- paddle/api/GradientMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 66115f8293..a44763bfa5 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -146,7 +146,7 @@ void GradientMachine::randParameters() { m->machine->randParameters(); } Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { - auto nn = std::dynamic_pointer_cast(m->machine); + auto nn = m->machine; if (nn) { auto mat = nn->getLayerOutput(layerName); return Matrix::createByPaddleMatrixPtr(&mat); From 3842bc4d7c904b2d0bda4aa48429a20c317f1420 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 13:42:33 +0800 Subject: [PATCH 04/17] refine code --- .../gradientmachines/MultiGradientMachine.cpp | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 56b1836e41..db13a88688 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -285,32 +285,34 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { // each thread has the same neuro network auto nn = threads_[0]->getGradientMachine(); - size_t height = 0; size_t width = nn->getLayerOutput(layerName)->getWidth(); + std::vector mats; + mats.reserve(threads_.size()); for (auto& thread : threads_) { - auto out = thread->getGradientMachine()->getLayerOutput(layerName); + MatrixPtr out = thread->getGradientMachine()->getLayerOutput(layerName); + mats.push_back(out); height += out->getHeight(); CHECK_EQ(width, out->getWidth()); } - MatrixPtr dst; - Matrix::resizeOrCreate(dst, height, width, false, useGpu_); + MatrixPtr layerOutput; + Matrix::resizeOrCreate(layerOutput, height, width, false, useGpu_); // copy one layer output from one trainer thread at each time size_t startRow = 0; - for (auto& thread : threads_) { - auto src = thread->getGradientMachine()->getLayerOutput(layerName); - auto tmpMatrix = dst->subMatrix(startRow, src->getHeight()); - tmpMatrix->copyFrom(*src, HPPL_STREAM_DEFAULT); - startRow += src->getHeight(); + + for (size_t i = 0; i < threads_.size(); i++) { + auto tmpMatrix = layerOutput->subMatrix(startRow, mats[i]->getHeight()); + tmpMatrix->copyFrom(*mats[i], HPPL_STREAM_DEFAULT); + startRow += mats[i]->getHeight(); } if (useGpu_) { hl_stream_synchronize(HPPL_STREAM_DEFAULT); } - return dst; + return layerOutput; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { From 84552872a337b42252233023191698f992aa5808 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 16:09:50 +0800 Subject: [PATCH 05/17] getLayerOutput in CPU --- paddle/gserver/gradientmachines/GradientMachine.h | 4 +++- .../gserver/gradientmachines/MultiGradientMachine.cpp | 10 +++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 201b65bc45..a814e771d1 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,7 +134,9 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) = 0; + virtual MatrixPtr getLayerOutput(const std::string& layerName) { + return nullptr; + } // see comment in Layer.h for the function with the same name virtual void resetState() {} diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index db13a88688..7e60920376 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -283,7 +283,7 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, } MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { - // each thread has the same neuro network + // each thread has the same neural network auto nn = threads_[0]->getGradientMachine(); size_t height = 0; size_t width = nn->getLayerOutput(layerName)->getWidth(); @@ -297,21 +297,17 @@ MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { } MatrixPtr layerOutput; - Matrix::resizeOrCreate(layerOutput, height, width, false, useGpu_); + Matrix::resizeOrCreate(layerOutput, height, width, false, false); // copy one layer output from one trainer thread at each time size_t startRow = 0; for (size_t i = 0; i < threads_.size(); i++) { auto tmpMatrix = layerOutput->subMatrix(startRow, mats[i]->getHeight()); - tmpMatrix->copyFrom(*mats[i], HPPL_STREAM_DEFAULT); + tmpMatrix->copyFrom(*mats[i]); startRow += mats[i]->getHeight(); } - if (useGpu_) { - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - } - return layerOutput; } From f846e8fec2e9c8d16e24e12d6b6b7efc77d3cc83 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 20 Feb 2017 15:13:43 +0800 Subject: [PATCH 06/17] Add const for getLayerOutput --- paddle/gserver/gradientmachines/GradientMachine.h | 2 +- .../gradientmachines/MultiGradientMachine.cpp | 12 ++++++------ .../gserver/gradientmachines/MultiGradientMachine.h | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.cpp | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.h | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index a814e771d1..5469c0d89f 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,7 +134,7 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) { + virtual MatrixPtr getLayerOutput(const std::string& layerName) const { return nullptr; } diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 7e60920376..2d42e64830 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,7 +282,8 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { +MatrixPtr MultiGradientMachine::getLayerOutput( + const std::string& layerName) const { // each thread has the same neural network auto nn = threads_[0]->getGradientMachine(); size_t height = 0; @@ -301,11 +302,10 @@ MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { // copy one layer output from one trainer thread at each time size_t startRow = 0; - - for (size_t i = 0; i < threads_.size(); i++) { - auto tmpMatrix = layerOutput->subMatrix(startRow, mats[i]->getHeight()); - tmpMatrix->copyFrom(*mats[i]); - startRow += mats[i]->getHeight(); + for (auto& mat : mats) { + auto tmpMatrix = layerOutput->subMatrix(startRow, mat->getHeight()); + tmpMatrix->copyFrom(*mat); + startRow += mat->getHeight(); } return layerOutput; diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 988d509817..a1a2d41706 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual MatrixPtr getLayerOutput(const std::string& layerName); + virtual MatrixPtr getLayerOutput(const std::string& layerName) const; virtual void onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 1f9ace4f67..00887c81d4 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,7 +293,7 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { +MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) const { auto it = layerMap_.find(layerName); CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; return it->second->getOutputValue(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index bf9ed09327..6ecc251a40 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual MatrixPtr getLayerOutput(const std::string& layerName); + virtual MatrixPtr getLayerOutput(const std::string& layerName) const; const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); From fbf864362dc1bd716a8db1f4441afe488fe3d74b Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 22 Feb 2017 16:02:58 +0800 Subject: [PATCH 07/17] Update python getLayerOutputs --- demo/image_classification/prediction.py | 2 +- demo/model_zoo/resnet/classify.py | 2 +- paddle/api/Arguments.cpp | 7 ++++ paddle/api/GradientMachine.cpp | 5 ++- paddle/api/PaddleAPI.h | 5 +-- paddle/api/Trainer.cpp | 8 ++--- .../gradientmachines/GradientMachine.h | 4 +-- .../gradientmachines/MultiGradientMachine.cpp | 32 +++++-------------- .../gradientmachines/MultiGradientMachine.h | 4 ++- .../gradientmachines/NeuralNetwork.cpp | 6 ++-- .../gserver/gradientmachines/NeuralNetwork.h | 2 +- paddle/gserver/layers/CosSimLayer.cpp | 2 +- paddle/py_paddle/util.py | 2 +- 13 files changed, 35 insertions(+), 46 deletions(-) diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py index 9a86aafcb2..49c0ff600c 100755 --- a/demo/image_classification/prediction.py +++ b/demo/image_classification/prediction.py @@ -126,7 +126,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - return output[output_layer].mean(0) + return output[output_layer]['value'].mean(0) def predict(self, image=None, output_layer=None): assert isinstance(image, basestring) diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py index 4631816c43..6074cc1d3a 100755 --- a/demo/model_zoo/resnet/classify.py +++ b/demo/model_zoo/resnet/classify.py @@ -156,7 +156,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - res[name] = output[name].mean(0) + res[name] = output[name]['value'].mean(0) return res diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp index 41beed38a8..a3f4bfffc9 100644 --- a/paddle/api/Arguments.cpp +++ b/paddle/api/Arguments.cpp @@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { return args; } +Arguments* Arguments::createByPaddleArgument(const void* ptr) { + auto p = (paddle::Argument*)(ptr); + auto args = new Arguments(); + args->m->outputs.push_back(*p); + return args; +} + Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { auto& a = m->getArg(idx); return Matrix::createByPaddleMatrixPtr(&a.value); diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index a44763bfa5..a64e70a6bd 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -144,12 +144,11 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { void GradientMachine::randParameters() { m->machine->randParameters(); } -Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const +Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { auto nn = m->machine; if (nn) { - auto mat = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&mat); + return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); } else { throw UnsupportError(); } diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index f5af8b0035..10569a7170 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -454,6 +454,7 @@ public: private: static Arguments* createByPaddleArgumentVector(void* ptr); + static Arguments* createByPaddleArgument(const void* ptr); void* getInternalArgumentsPtr() const; private: @@ -769,7 +770,7 @@ public: void randParameters(); - Matrix* getLayerOutput(const std::string& layerName) const + Arguments* getLayerOutput(const std::string& layerName) const throw(UnsupportError); /** @@ -952,7 +953,7 @@ public: Arguments* getForwardOutput(); - Matrix* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName); }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index d83dc380be..c742614aff 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,12 +131,10 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Matrix* Trainer::getLayerOutput(const std::string& layerName) { - auto nn = std::dynamic_pointer_cast( - this->m->getGradientMachine()); +Arguments* Trainer::getLayerOutput(const std::string& layerName) { + auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - auto m = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&m); + return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 5469c0d89f..ae39783c6b 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,8 +134,8 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) const { - return nullptr; + virtual const Argument& getLayerOutput(const std::string& layerName) { + return *((Argument*)nullptr); } // see comment in Layer.h for the function with the same name diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 2d42e64830..6b11b0155e 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,33 +282,17 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -MatrixPtr MultiGradientMachine::getLayerOutput( - const std::string& layerName) const { - // each thread has the same neural network - auto nn = threads_[0]->getGradientMachine(); - size_t height = 0; - size_t width = nn->getLayerOutput(layerName)->getWidth(); - std::vector mats; - mats.reserve(threads_.size()); - for (auto& thread : threads_) { - MatrixPtr out = thread->getGradientMachine()->getLayerOutput(layerName); - mats.push_back(out); - height += out->getHeight(); - CHECK_EQ(width, out->getWidth()); - } +const Argument& MultiGradientMachine::getLayerOutput( + const std::string& layerName) { + std::vector args; + args.reserve(threads_.size()); - MatrixPtr layerOutput; - Matrix::resizeOrCreate(layerOutput, height, width, false, false); - - // copy one layer output from one trainer thread at each time - size_t startRow = 0; - for (auto& mat : mats) { - auto tmpMatrix = layerOutput->subMatrix(startRow, mat->getHeight()); - tmpMatrix->copyFrom(*mat); - startRow += mat->getHeight(); + for (auto& thread : threads_) { + args.push_back(thread->getGradientMachine()->getLayerOutput(layerName)); } + outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_); - return layerOutput; + return outLayerArgs_; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index a1a2d41706..9083230afd 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual MatrixPtr getLayerOutput(const std::string& layerName) const; + virtual const Argument& getLayerOutput(const std::string& layerName); virtual void onPassEnd(); @@ -316,6 +316,8 @@ protected: std::vector outArgs_; hl_stream_t outArgStream_; + Argument outLayerArgs_; + /// ParameterType which needs to be merged from each GPU std::vector mergeTypes_; int numDevices_; /* number of gpu devices */ diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 00887c81d4..d1afde40e1 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,10 +293,8 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) const { - auto it = layerMap_.find(layerName); - CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; - return it->second->getOutputValue(); +const Argument& NeuralNetwork::getLayerOutput(const std::string& layerName) { + return getLayer(layerName)->getOutput(); } void NeuralNetwork::onPassEnd() { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 6ecc251a40..b4dc38e31b 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual MatrixPtr getLayerOutput(const std::string& layerName) const; + virtual const Argument& getLayerOutput(const std::string& layerName); const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index 1501c74370..57ba124e40 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -68,7 +68,7 @@ void CosSimLayer::forward(PassType passType) { void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; const auto outG = this->getOutputGrad(); const auto outV = this->getOutputValue(); diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index ce105d249a..a708def1d2 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -208,7 +208,7 @@ def __monkeypatch_gradient_machine__(): output = dict() for name in layerNames: - output[name] = __matrix_to_numpy__(self.getLayerOutput(name)) + output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) return output swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs From bbfcee20fd8733c2e9ea5ec49e1963d62e777730 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 22 Feb 2017 17:42:08 +0800 Subject: [PATCH 08/17] Add const for Trainer::getLayerOutput --- paddle/api/PaddleAPI.h | 2 +- paddle/api/Trainer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index 10569a7170..59798796a2 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -953,7 +953,7 @@ public: Arguments* getForwardOutput(); - Arguments* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName) const; }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index c742614aff..29cf2aa450 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,7 +131,7 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Arguments* Trainer::getLayerOutput(const std::string& layerName) { +Arguments* Trainer::getLayerOutput(const std::string& layerName) const { auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); From d0dcb3ea34ada48901666283929e35587d1c5c78 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Wed, 22 Feb 2017 15:34:36 -0800 Subject: [PATCH 09/17] create numpy array reader creator and text reader creator --- python/paddle/reader/__init__.py | 2 + python/paddle/reader/creator.py | 52 +++++++++++++++++++ python/paddle/reader/tests/CMakeLists.txt | 5 ++ python/paddle/reader/tests/creator_test.py | 38 ++++++++++++++ .../paddle/reader/tests/test_data_creator.txt | 3 ++ 5 files changed, 100 insertions(+) create mode 100644 python/paddle/reader/creator.py create mode 100644 python/paddle/reader/tests/creator_test.py create mode 100644 python/paddle/reader/tests/test_data_creator.txt diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py index 493b410e82..7373dc461b 100644 --- a/python/paddle/reader/__init__.py +++ b/python/paddle/reader/__init__.py @@ -21,3 +21,5 @@ # # r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt")) from decorator import * + +import creator diff --git a/python/paddle/reader/creator.py b/python/paddle/reader/creator.py new file mode 100644 index 0000000000..5c840f94b5 --- /dev/null +++ b/python/paddle/reader/creator.py @@ -0,0 +1,52 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['np_array', 'text_file'] + + +def np_array(x): + """ + Creates a data reader from numpy array. + The highest dimension will be treated as batch dimension to iterate on. + + :param x: the numpy array to create reader from. + :returns: data reader created from x. + """ + + def reader(): + if x.ndim < 1: + yield x + + for e in x: + yield e + + return reader + + +def text_file(path): + """ + Creates a data reader that outputs text line by line from given text file. + Trailing new line ('\n') of each line will be removed. + + :path: path of the text file. + :returns: data reader of text file + """ + + def reader(): + f = open(path, "r") + for l in f: + yield l.rstrip('\n') + f.close() + + return reader diff --git a/python/paddle/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt index 502c897d89..da072fb3db 100644 --- a/python/paddle/reader/tests/CMakeLists.txt +++ b/python/paddle/reader/tests/CMakeLists.txt @@ -2,3 +2,8 @@ add_test(NAME reader_decorator_test COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/decorator_test.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + +add_test(NAME reader_creator_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/creator_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/reader/tests/creator_test.py b/python/paddle/reader/tests/creator_test.py new file mode 100644 index 0000000000..eda8ab6715 --- /dev/null +++ b/python/paddle/reader/tests/creator_test.py @@ -0,0 +1,38 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import paddle.reader.creator +import numpy as np +import os + + +class TestNumpyArray(unittest.TestCase): + def test_numpy_array(self): + l = [[1, 2, 3], [4, 5, 6]] + x = np.array(l, np.int32) + reader = paddle.reader.creator.np_array(x) + for idx, e in enumerate(reader()): + self.assertItemsEqual(e, l[idx]) + + +class TestTextFile(unittest.TestCase): + def test_text_file(self): + path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") + reader = paddle.reader.creator.text_file(path) + for idx, e in enumerate(reader()): + self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/reader/tests/test_data_creator.txt b/python/paddle/reader/tests/test_data_creator.txt new file mode 100644 index 0000000000..a2a8d47d43 --- /dev/null +++ b/python/paddle/reader/tests/test_data_creator.txt @@ -0,0 +1,3 @@ +0 1 +2 3 +4 5 From 51de2ded3ecf674bd5f96a9f3129d6630bfb65a1 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 23 Feb 2017 16:59:38 +0800 Subject: [PATCH 10/17] add optimizer in v2 --- python/paddle/v2/optimizer.py | 55 +++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index aa2942bc9f..10e255dc94 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -3,7 +3,10 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.v2 -__all__ = ['Adam', 'Adamax'] +__all__ = [ + 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', + 'RMSProp', 'ModelAverage', 'L2Regularization' +] class Optimizer(object): @@ -38,6 +41,14 @@ class Optimizer(object): pass_num) +class Momentum(Optimizer): + def __init__(self, momentum=None, sparse=False, **kwargs): + learning_method = v1_optimizers.MomentumOptimizer( + momentum=None, sparse=False) + super(Momentum, self).__init__( + learning_method=learning_method, **kwargs) + + class Adam(Optimizer): def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs): learning_method = v1_optimizers.AdamOptimizer( @@ -52,7 +63,45 @@ class Adamax(Optimizer): super(Adamax, self).__init__(learning_method=learning_method, **kwargs) +class AdaGrad(Optimizer): + def __init__(self, **kwargs): + learning_method = v1_optimizers.AdaGradOptimizer() + super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs) + + +class DecayedAdaGrad(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): + learning_method = v1_optimizers.DecayedAdaGradOptimizer( + rho=rho, epsilon=epsilon) + super(DecayedAdaGrad, self).__init__( + learning_method=learning_method, **kwargs) + + +class AdaDelta(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): + learning_method = v1_optimizers.AdaDeltaOptimizer( + rho=rho, epsilon=epsilon) + super(AdaDelta, self).__init__( + learning_method=learning_method, **kwargs) + + +class RMSProp(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): + learning_method = v1_optimizers.RMSPropOptimizer( + rho=rho, epsilon=epsilon) + super(RMSProp, self).__init__(learning_method=learning_method, **kwargs) + + +ModelAverage = v1_optimizers.ModelAverage +L2Regularization = v1_optimizers.L2Regularization + if __name__ == '__main__': swig_api.initPaddle('--use_gpu=false') - opt = paddle.v2.optimizer.Adam() - print opt.enable_types() + for opt in [ + Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), + AdaDelta(), RMSProp(), Adam( + model_average=ModelAverage(average_window=0.5), + regularization=L2Regularization(rate=0.5), + gradient_clipping_threshold=25) + ]: + print opt, opt.enable_types() From 950b4a312697fe8b5132437d85b919353af15365 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 23 Feb 2017 17:24:01 +0800 Subject: [PATCH 11/17] change return type Argument --- paddle/api/GradientMachine.cpp | 3 ++- paddle/api/Trainer.cpp | 3 ++- paddle/gserver/gradientmachines/GradientMachine.h | 2 +- paddle/gserver/gradientmachines/MultiGradientMachine.cpp | 3 +-- paddle/gserver/gradientmachines/MultiGradientMachine.h | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.cpp | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.h | 2 +- paddle/gserver/layers/CosSimVecMatLayer.cpp | 4 ++-- 8 files changed, 11 insertions(+), 10 deletions(-) diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index a64e70a6bd..538ca2999f 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -148,7 +148,8 @@ Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { auto nn = m->machine; if (nn) { - return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } else { throw UnsupportError(); } diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index 29cf2aa450..84e4ca054a 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -134,7 +134,8 @@ void Trainer::finishTestPeriod() { m->finishTestPeriod(); } Arguments* Trainer::getLayerOutput(const std::string& layerName) const { auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index ae39783c6b..bc2f2f8563 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,7 +134,7 @@ public: backward(callback); } - virtual const Argument& getLayerOutput(const std::string& layerName) { + virtual Argument getLayerOutput(const std::string& layerName) { return *((Argument*)nullptr); } diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 6b11b0155e..123273f916 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,8 +282,7 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -const Argument& MultiGradientMachine::getLayerOutput( - const std::string& layerName) { +Argument MultiGradientMachine::getLayerOutput(const std::string& layerName) { std::vector args; args.reserve(threads_.size()); diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 9083230afd..838a52b515 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual const Argument& getLayerOutput(const std::string& layerName); + virtual Argument getLayerOutput(const std::string& layerName); virtual void onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index d1afde40e1..2f2aa24aac 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,7 +293,7 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -const Argument& NeuralNetwork::getLayerOutput(const std::string& layerName) { +Argument NeuralNetwork::getLayerOutput(const std::string& layerName) { return getLayer(layerName)->getOutput(); } diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index b4dc38e31b..e7b6c43840 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual const Argument& getLayerOutput(const std::string& layerName); + virtual Argument getLayerOutput(const std::string& layerName); const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index aabafd473a..0f887d8adf 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -112,7 +112,7 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); - CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); @@ -145,7 +145,7 @@ void CosSimVecMatLayer::forward(PassType passType) { } void CosSimVecMatLayer::backward(const UpdateCallback& callback) { - CHECK_EQ(backward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); From d1ab3c80744a851164dd4dc76a847193eb4c5562 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 23 Feb 2017 19:20:33 +0800 Subject: [PATCH 12/17] MNIST dataset reader implementation --- python/paddle/v2/data_set/__init__.py | 0 python/paddle/v2/data_set/mnist.py | 62 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 python/paddle/v2/data_set/__init__.py create mode 100644 python/paddle/v2/data_set/mnist.py diff --git a/python/paddle/v2/data_set/__init__.py b/python/paddle/v2/data_set/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py new file mode 100644 index 0000000000..34f61bb9f6 --- /dev/null +++ b/python/paddle/v2/data_set/mnist.py @@ -0,0 +1,62 @@ +import sklearn.datasets.mldata +import sklearn.model_selection +import numpy + +__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] + +DATA_HOME = None + + +def __mnist_reader__(data, target): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield data[i].astype(numpy.float32), int(target[i]) + + +class MNISTReader(object): + """ + mnist dataset reader. The `train_reader` and `test_reader` method returns + a iterator of each sample. Each sample is combined by 784-dim float and a + one-dim label + """ + + def __init__(self, random_state): + data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) + n_train = 60000 + self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( + data.data / 255.0, + data.target.astype("int"), + train_size=n_train, + random_state=random_state) + + def train_reader(self): + return __mnist_reader__(self.X_train, self.y_train) + + def test_reader(self): + return __mnist_reader__(self.X_test, self.y_test) + + +__default_instance__ = MNISTReader(0) + + +def train_reader_creator(): + """ + Default train set reader creator. + """ + return __default_instance__.train_reader + + +def test_reader_creator(): + """ + Default test set reader creator. + """ + return __default_instance__.test_reader + + +def unittest(): + assert len(list(train_reader_creator()())) == 60000 + + +if __name__ == '__main__': + unittest() From b9d4f71c4a1c2a9f6c3593a1990c5becf8377268 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 23 Feb 2017 14:10:23 -0800 Subject: [PATCH 13/17] fix according to comments --- python/paddle/reader/creator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/reader/creator.py b/python/paddle/reader/creator.py index 5c840f94b5..5a91bb0b8e 100644 --- a/python/paddle/reader/creator.py +++ b/python/paddle/reader/creator.py @@ -17,8 +17,9 @@ __all__ = ['np_array', 'text_file'] def np_array(x): """ - Creates a data reader from numpy array. - The highest dimension will be treated as batch dimension to iterate on. + Creates a reader that yields elements of x, if it is a + numpy vector. Or rows of x, if it is a numpy matrix. + Or any sub-hyperplane indexed by the highest dimension. :param x: the numpy array to create reader from. :returns: data reader created from x. From 38a792f20ed9e65d2920ded6ad42a5b68f2146ee Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 24 Feb 2017 13:52:31 +0800 Subject: [PATCH 14/17] Clean mnist code --- python/paddle/v2/data_set/config.py | 8 ++++ python/paddle/v2/data_set/mnist.py | 58 +++++++++++++---------------- 2 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 python/paddle/v2/data_set/config.py diff --git a/python/paddle/v2/data_set/config.py b/python/paddle/v2/data_set/config.py new file mode 100644 index 0000000000..69e96d65ef --- /dev/null +++ b/python/paddle/v2/data_set/config.py @@ -0,0 +1,8 @@ +import os + +__all__ = ['DATA_HOME'] + +DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') + +if not os.path.exists(DATA_HOME): + os.makedirs(DATA_HOME) diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py index 34f61bb9f6..6f35acf683 100644 --- a/python/paddle/v2/data_set/mnist.py +++ b/python/paddle/v2/data_set/mnist.py @@ -1,61 +1,53 @@ import sklearn.datasets.mldata import sklearn.model_selection import numpy +from config import DATA_HOME -__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] +__all__ = ['MNIST', 'train_creator', 'test_creator'] -DATA_HOME = None +def __mnist_reader_creator__(data, target): + def reader(): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield (data[i] / 255.0).astype(numpy.float32), int(target[i]) -def __mnist_reader__(data, target): - n_samples = data.shape[0] - for i in xrange(n_samples): - yield data[i].astype(numpy.float32), int(target[i]) + return reader -class MNISTReader(object): +class MNIST(object): """ mnist dataset reader. The `train_reader` and `test_reader` method returns a iterator of each sample. Each sample is combined by 784-dim float and a one-dim label """ - def __init__(self, random_state): + def __init__(self, random_state=0, test_size=10000, **options): data = sklearn.datasets.mldata.fetch_mldata( "MNIST original", data_home=DATA_HOME) - n_train = 60000 self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( - data.data / 255.0, - data.target.astype("int"), - train_size=n_train, - random_state=random_state) + data.data, + data.target, + test_size=test_size, + random_state=random_state, + **options) - def train_reader(self): - return __mnist_reader__(self.X_train, self.y_train) + def train_creator(self): + return __mnist_reader_creator__(self.X_train, self.y_train) - def test_reader(self): - return __mnist_reader__(self.X_test, self.y_test) + def test_creator(self): + return __mnist_reader_creator__(self.X_test, self.y_test) -__default_instance__ = MNISTReader(0) - - -def train_reader_creator(): - """ - Default train set reader creator. - """ - return __default_instance__.train_reader - - -def test_reader_creator(): - """ - Default test set reader creator. - """ - return __default_instance__.test_reader +__default_instance__ = MNIST() +train_creator = __default_instance__.train_creator +test_creator = __default_instance__.test_creator def unittest(): - assert len(list(train_reader_creator()())) == 60000 + size = 12045 + mnist = MNIST(test_size=size) + assert len(list(mnist.test_creator()())) == size if __name__ == '__main__': From ef9041c07bdf5d5f86b0b5b12045b4cec3719953 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 23 Feb 2017 19:20:33 +0800 Subject: [PATCH 15/17] MNIST dataset reader implementation --- python/paddle/v2/data_set/__init__.py | 0 python/paddle/v2/data_set/mnist.py | 62 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 python/paddle/v2/data_set/__init__.py create mode 100644 python/paddle/v2/data_set/mnist.py diff --git a/python/paddle/v2/data_set/__init__.py b/python/paddle/v2/data_set/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py new file mode 100644 index 0000000000..34f61bb9f6 --- /dev/null +++ b/python/paddle/v2/data_set/mnist.py @@ -0,0 +1,62 @@ +import sklearn.datasets.mldata +import sklearn.model_selection +import numpy + +__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] + +DATA_HOME = None + + +def __mnist_reader__(data, target): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield data[i].astype(numpy.float32), int(target[i]) + + +class MNISTReader(object): + """ + mnist dataset reader. The `train_reader` and `test_reader` method returns + a iterator of each sample. Each sample is combined by 784-dim float and a + one-dim label + """ + + def __init__(self, random_state): + data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) + n_train = 60000 + self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( + data.data / 255.0, + data.target.astype("int"), + train_size=n_train, + random_state=random_state) + + def train_reader(self): + return __mnist_reader__(self.X_train, self.y_train) + + def test_reader(self): + return __mnist_reader__(self.X_test, self.y_test) + + +__default_instance__ = MNISTReader(0) + + +def train_reader_creator(): + """ + Default train set reader creator. + """ + return __default_instance__.train_reader + + +def test_reader_creator(): + """ + Default test set reader creator. + """ + return __default_instance__.test_reader + + +def unittest(): + assert len(list(train_reader_creator()())) == 60000 + + +if __name__ == '__main__': + unittest() From befc3e066b633ae2a9e0c448037a93ede6de4ddf Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 24 Feb 2017 13:52:31 +0800 Subject: [PATCH 16/17] Clean mnist code --- python/paddle/v2/data_set/config.py | 8 ++++ python/paddle/v2/data_set/mnist.py | 58 +++++++++++++---------------- 2 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 python/paddle/v2/data_set/config.py diff --git a/python/paddle/v2/data_set/config.py b/python/paddle/v2/data_set/config.py new file mode 100644 index 0000000000..69e96d65ef --- /dev/null +++ b/python/paddle/v2/data_set/config.py @@ -0,0 +1,8 @@ +import os + +__all__ = ['DATA_HOME'] + +DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') + +if not os.path.exists(DATA_HOME): + os.makedirs(DATA_HOME) diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py index 34f61bb9f6..6f35acf683 100644 --- a/python/paddle/v2/data_set/mnist.py +++ b/python/paddle/v2/data_set/mnist.py @@ -1,61 +1,53 @@ import sklearn.datasets.mldata import sklearn.model_selection import numpy +from config import DATA_HOME -__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] +__all__ = ['MNIST', 'train_creator', 'test_creator'] -DATA_HOME = None +def __mnist_reader_creator__(data, target): + def reader(): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield (data[i] / 255.0).astype(numpy.float32), int(target[i]) -def __mnist_reader__(data, target): - n_samples = data.shape[0] - for i in xrange(n_samples): - yield data[i].astype(numpy.float32), int(target[i]) + return reader -class MNISTReader(object): +class MNIST(object): """ mnist dataset reader. The `train_reader` and `test_reader` method returns a iterator of each sample. Each sample is combined by 784-dim float and a one-dim label """ - def __init__(self, random_state): + def __init__(self, random_state=0, test_size=10000, **options): data = sklearn.datasets.mldata.fetch_mldata( "MNIST original", data_home=DATA_HOME) - n_train = 60000 self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( - data.data / 255.0, - data.target.astype("int"), - train_size=n_train, - random_state=random_state) + data.data, + data.target, + test_size=test_size, + random_state=random_state, + **options) - def train_reader(self): - return __mnist_reader__(self.X_train, self.y_train) + def train_creator(self): + return __mnist_reader_creator__(self.X_train, self.y_train) - def test_reader(self): - return __mnist_reader__(self.X_test, self.y_test) + def test_creator(self): + return __mnist_reader_creator__(self.X_test, self.y_test) -__default_instance__ = MNISTReader(0) - - -def train_reader_creator(): - """ - Default train set reader creator. - """ - return __default_instance__.train_reader - - -def test_reader_creator(): - """ - Default test set reader creator. - """ - return __default_instance__.test_reader +__default_instance__ = MNIST() +train_creator = __default_instance__.train_creator +test_creator = __default_instance__.test_creator def unittest(): - assert len(list(train_reader_creator()())) == 60000 + size = 12045 + mnist = MNIST(test_size=size) + assert len(list(mnist.test_creator()())) == size if __name__ == '__main__': From a6028d79dcaba69f6f95c7ebf9c12c33ad42b82e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 27 Feb 2017 10:39:17 +0800 Subject: [PATCH 17/17] Clean mnist reader --- python/paddle/v2/data_set/mnist.py | 35 +++++++++--------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py index 6f35acf683..4b392af400 100644 --- a/python/paddle/v2/data_set/mnist.py +++ b/python/paddle/v2/data_set/mnist.py @@ -15,39 +15,24 @@ def __mnist_reader_creator__(data, target): return reader -class MNIST(object): - """ - mnist dataset reader. The `train_reader` and `test_reader` method returns - a iterator of each sample. Each sample is combined by 784-dim float and a - one-dim label - """ +TEST_SIZE = 10000 - def __init__(self, random_state=0, test_size=10000, **options): - data = sklearn.datasets.mldata.fetch_mldata( - "MNIST original", data_home=DATA_HOME) - self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( - data.data, - data.target, - test_size=test_size, - random_state=random_state, - **options) +data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) +X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + data.data, data.target, test_size=TEST_SIZE, random_state=0) - def train_creator(self): - return __mnist_reader_creator__(self.X_train, self.y_train) - def test_creator(self): - return __mnist_reader_creator__(self.X_test, self.y_test) +def train_creator(): + return __mnist_reader_creator__(X_train, y_train) -__default_instance__ = MNIST() -train_creator = __default_instance__.train_creator -test_creator = __default_instance__.test_creator +def test_creator(): + return __mnist_reader_creator__(X_test, y_test) def unittest(): - size = 12045 - mnist = MNIST(test_size=size) - assert len(list(mnist.test_creator()())) == size + assert len(list(test_creator()())) == TEST_SIZE if __name__ == '__main__':