From 0446b488fd3db0b3e1b3c03a9a653a2843bdefca Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 16 Feb 2017 20:46:10 +0800 Subject: [PATCH 01/35] LayerOutput for single machine multiple devices --- .../gradientmachines/GradientMachine.h | 2 + .../gradientmachines/MultiGradientMachine.cpp | 38 +++++++++++++++++++ .../gradientmachines/MultiGradientMachine.h | 2 + .../gradientmachines/NeuralNetwork.cpp | 1 + .../gserver/gradientmachines/NeuralNetwork.h | 3 +- 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 0829968d87..201b65bc45 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,6 +134,8 @@ public: backward(callback); } + virtual MatrixPtr getLayerOutput(const std::string& layerName) = 0; + // see comment in Layer.h for the function with the same name virtual void resetState() {} diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 80f223824d..a571b3d72f 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,6 +282,44 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } +MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { + // neural networks are same in each trainer thread + // layer output height = height of layer output * thread nums + auto nn = dynamic_cast(threads_[0]->getGradientMachine()); + auto height = nn->getLayerOutput(layerName)->getHeight() * threads_.size(); + auto stream = HPPL_STREAM_DEFAULT; + + auto copyLayerOutput = [height, stream]( + MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) { + size_t width = src->getWidth(); + if (!dst) { + dst = src->clone(height, width, useGpu); + } else { + dst->resize(height, width); + } + + MatrixPtr tmpMatrix = dst->subMatrix(startRow, src->getHeight()); + tmpMatrix->copyFrom(*src, stream); + }; + + MatrixPtr mats; + size_t startRow = 0; + + // copy one layer output from one trainer thread at each time + for (auto& thread : threads_) { + auto nn = dynamic_cast(thread->getGradientMachine()); + auto mat = nn->getLayerOutput(layerName); + copyLayerOutput(mats, mat, startRow, useGpu_); + startRow += mat->getHeight(); + } + + if (useGpu_) { + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + } + + return mats; +} + void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { for (size_t i = 0; i < parameters_.size(); i++) { if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue; diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 9be15ef4bc..988d509817 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,6 +189,8 @@ public: PassType passType, const UpdateCallback& callback); + virtual MatrixPtr getLayerOutput(const std::string& layerName); + virtual void onPassEnd(); virtual void finish(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 22051e07ee..1f9ace4f67 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -298,6 +298,7 @@ MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; return it->second->getOutputValue(); } + void NeuralNetwork::onPassEnd() { for (auto& layer : layers_) { layer->onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 25af4abcf8..bf9ed09327 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,8 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - MatrixPtr getLayerOutput(const std::string& layerName); + virtual MatrixPtr getLayerOutput(const std::string& layerName); + const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); CHECK(it != layerMap_.end()) << "Unknown layer " << layerName; From 7c5fd231063908e1d7699c995d1acebb2d321aa9 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 13:08:39 +0800 Subject: [PATCH 02/35] Update MultiGradientMachine::getLayerOutput --- .../gradientmachines/MultiGradientMachine.cpp | 41 ++++++++----------- paddle/gserver/layers/CosSimLayer.cpp | 2 +- paddle/math/tests/test_RowBuffer.cpp | 8 ++-- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index a571b3d72f..56b1836e41 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -283,41 +283,34 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, } MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { - // neural networks are same in each trainer thread - // layer output height = height of layer output * thread nums - auto nn = dynamic_cast(threads_[0]->getGradientMachine()); - auto height = nn->getLayerOutput(layerName)->getHeight() * threads_.size(); - auto stream = HPPL_STREAM_DEFAULT; - - auto copyLayerOutput = [height, stream]( - MatrixPtr& dst, MatrixPtr src, int startRow, bool useGpu) { - size_t width = src->getWidth(); - if (!dst) { - dst = src->clone(height, width, useGpu); - } else { - dst->resize(height, width); - } + // each thread has the same neuro network + auto nn = threads_[0]->getGradientMachine(); - MatrixPtr tmpMatrix = dst->subMatrix(startRow, src->getHeight()); - tmpMatrix->copyFrom(*src, stream); - }; + size_t height = 0; + size_t width = nn->getLayerOutput(layerName)->getWidth(); + for (auto& thread : threads_) { + auto out = thread->getGradientMachine()->getLayerOutput(layerName); + height += out->getHeight(); + CHECK_EQ(width, out->getWidth()); + } - MatrixPtr mats; - size_t startRow = 0; + MatrixPtr dst; + Matrix::resizeOrCreate(dst, height, width, false, useGpu_); // copy one layer output from one trainer thread at each time + size_t startRow = 0; for (auto& thread : threads_) { - auto nn = dynamic_cast(thread->getGradientMachine()); - auto mat = nn->getLayerOutput(layerName); - copyLayerOutput(mats, mat, startRow, useGpu_); - startRow += mat->getHeight(); + auto src = thread->getGradientMachine()->getLayerOutput(layerName); + auto tmpMatrix = dst->subMatrix(startRow, src->getHeight()); + tmpMatrix->copyFrom(*src, HPPL_STREAM_DEFAULT); + startRow += src->getHeight(); } if (useGpu_) { hl_stream_synchronize(HPPL_STREAM_DEFAULT); } - return mats; + return dst; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index a6c0300acf..1501c74370 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -42,7 +42,7 @@ void CosSimLayer::forward(PassType passType) { /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); - CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); diff --git a/paddle/math/tests/test_RowBuffer.cpp b/paddle/math/tests/test_RowBuffer.cpp index 5f66f22ef7..8cc4c69a1a 100644 --- a/paddle/math/tests/test_RowBuffer.cpp +++ b/paddle/math/tests/test_RowBuffer.cpp @@ -17,10 +17,10 @@ limitations under the License. */ TEST(RowBuffer, testAutoGrow) { paddle::RowBuffer buf(128); - ASSERT_EQ(128, buf.getWidth()); + ASSERT_EQ(128UL, buf.getWidth()); ASSERT_TRUE(buf.isAutoGrowth()); buf.resize(2); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } @@ -35,7 +35,7 @@ TEST(RowBuffer, testAutoGrow) { data[i] = i; } - ASSERT_EQ(3, buf.getRowCount()); + ASSERT_EQ(3UL, buf.getRowCount()); for (size_t i = 0; i < buf.getRowCount() - 1; ++i) { for (size_t j = 0; j < buf.getWidth(); ++j) { ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); @@ -51,7 +51,7 @@ TEST(RowBuffer, testWithMemBuf) { std::make_shared(128 * 2 * sizeof(real)); paddle::RowBuffer buf(mem, 128); ASSERT_TRUE(!buf.isAutoGrowth()); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } From 258e5ec59f28f617397646edbf67d4a576f0d3f0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 13:23:38 +0800 Subject: [PATCH 03/35] update GradientMachine API --- paddle/api/GradientMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 66115f8293..a44763bfa5 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -146,7 +146,7 @@ void GradientMachine::randParameters() { m->machine->randParameters(); } Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { - auto nn = std::dynamic_pointer_cast(m->machine); + auto nn = m->machine; if (nn) { auto mat = nn->getLayerOutput(layerName); return Matrix::createByPaddleMatrixPtr(&mat); From 3842bc4d7c904b2d0bda4aa48429a20c317f1420 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 13:42:33 +0800 Subject: [PATCH 04/35] refine code --- .../gradientmachines/MultiGradientMachine.cpp | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 56b1836e41..db13a88688 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -285,32 +285,34 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { // each thread has the same neuro network auto nn = threads_[0]->getGradientMachine(); - size_t height = 0; size_t width = nn->getLayerOutput(layerName)->getWidth(); + std::vector mats; + mats.reserve(threads_.size()); for (auto& thread : threads_) { - auto out = thread->getGradientMachine()->getLayerOutput(layerName); + MatrixPtr out = thread->getGradientMachine()->getLayerOutput(layerName); + mats.push_back(out); height += out->getHeight(); CHECK_EQ(width, out->getWidth()); } - MatrixPtr dst; - Matrix::resizeOrCreate(dst, height, width, false, useGpu_); + MatrixPtr layerOutput; + Matrix::resizeOrCreate(layerOutput, height, width, false, useGpu_); // copy one layer output from one trainer thread at each time size_t startRow = 0; - for (auto& thread : threads_) { - auto src = thread->getGradientMachine()->getLayerOutput(layerName); - auto tmpMatrix = dst->subMatrix(startRow, src->getHeight()); - tmpMatrix->copyFrom(*src, HPPL_STREAM_DEFAULT); - startRow += src->getHeight(); + + for (size_t i = 0; i < threads_.size(); i++) { + auto tmpMatrix = layerOutput->subMatrix(startRow, mats[i]->getHeight()); + tmpMatrix->copyFrom(*mats[i], HPPL_STREAM_DEFAULT); + startRow += mats[i]->getHeight(); } if (useGpu_) { hl_stream_synchronize(HPPL_STREAM_DEFAULT); } - return dst; + return layerOutput; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { From 84552872a337b42252233023191698f992aa5808 Mon Sep 17 00:00:00 2001 From: liaogang Date: Fri, 17 Feb 2017 16:09:50 +0800 Subject: [PATCH 05/35] getLayerOutput in CPU --- paddle/gserver/gradientmachines/GradientMachine.h | 4 +++- .../gserver/gradientmachines/MultiGradientMachine.cpp | 10 +++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 201b65bc45..a814e771d1 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,7 +134,9 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) = 0; + virtual MatrixPtr getLayerOutput(const std::string& layerName) { + return nullptr; + } // see comment in Layer.h for the function with the same name virtual void resetState() {} diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index db13a88688..7e60920376 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -283,7 +283,7 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, } MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { - // each thread has the same neuro network + // each thread has the same neural network auto nn = threads_[0]->getGradientMachine(); size_t height = 0; size_t width = nn->getLayerOutput(layerName)->getWidth(); @@ -297,21 +297,17 @@ MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { } MatrixPtr layerOutput; - Matrix::resizeOrCreate(layerOutput, height, width, false, useGpu_); + Matrix::resizeOrCreate(layerOutput, height, width, false, false); // copy one layer output from one trainer thread at each time size_t startRow = 0; for (size_t i = 0; i < threads_.size(); i++) { auto tmpMatrix = layerOutput->subMatrix(startRow, mats[i]->getHeight()); - tmpMatrix->copyFrom(*mats[i], HPPL_STREAM_DEFAULT); + tmpMatrix->copyFrom(*mats[i]); startRow += mats[i]->getHeight(); } - if (useGpu_) { - hl_stream_synchronize(HPPL_STREAM_DEFAULT); - } - return layerOutput; } From d2dfa70deb08c3c8b13e2154afed6a3e4ce535d7 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 17 Feb 2017 19:34:11 +0800 Subject: [PATCH 06/35] data converter --- python/paddle/v2/data_converter.py | 240 +++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 python/paddle/v2/data_converter.py diff --git a/python/paddle/v2/data_converter.py b/python/paddle/v2/data_converter.py new file mode 100644 index 0000000000..5d7b8a736b --- /dev/null +++ b/python/paddle/v2/data_converter.py @@ -0,0 +1,240 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import py_paddle.swig_paddle +import numpy + +__all__ = ['DataConverter'] + + +class IDataConverter(object): + def __init__(self, input_type, pos): + """ + :param input_type: data type + :type input_type: dp2.InputType + :param pos: which input, start from 0 + :type pos: int + """ + self.input_type = input_type + assert isinstance(self.input_type, dp2.InputType) + self.pos = pos + + def convert(self, data, argument): + """ + Conv data to paddle format. + :param data: input data + :param argument: paddle format + """ + pass + + +class DenseConvert(IDataConverter): + def __init__(self, input_type, pos): + IDataConverter.__init__(self, input_type, pos) + + def convert(self, data, argument): + """ + :param data: input data + :type data: list | numpy array + :param argument: the type which paddle is acceptable + :type argument: swig_paddle.Arguments + """ + assert isinstance(argument, swig_paddle.Arguments) + if data.dtype != numpy.float32: + data = data.astype(numpy.float32) + m = swig_paddle.Matrix.createDenseFromNumpy(data, True, False) + argument.setSlotValue(self.pos, m) + + +class SparseBinaryConvert(IDataConverter): + def __init__(self, input_type, pos): + IDataConverter.__init__(self, input_type, pos) + self.__rows__ = [0] + self.__cols__ = [] + self.__height__ = 0 + self.__nnz__ = 0 + self.__value__ = [] + + def fill_csr(self, data): + self.__height__ = len(data) + for x in data: + self.__rows__.append(self.__rows__[-1] + len(x)) + self__cols__ = data.flatten() + + def convert(self, data, argument): + assert isinstance(argument, swig_paddle.Arguments) + + fill_csr(data) + m = swig_paddle.Matrix.createSparse(self.__height__, + self.input_type.dim, + len(self.__cols__), + len(self.__value__) == 0) + assert isinstance(m, swig_paddle.Matrix) + m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__) + argument.setSlotValue(self.pos, m) + + +class SparseFloatConvert(SparseBinaryConvert): + def __init__(self, input_type, pos): + SparseBinaryConvert.__init__(self, input_type, pos) + + def fill_csr(self, data): + self.__height__ = len(data) + for x in data: + self.__rows__.append(self.__rows__[-1] + len(x)) + self.__cols__.extend((x[0] for x in data)) + self.__value__.extend((x[1] for x in data)) + + +class IndexConvert(IDataConverter): + def __init__(self, input_type, pos): + IDataConverter.__init__(self, input_type, pos) + self.__ids__ = [] + + def convert(self, data, argument): + assert isinstance(argument, swig_paddle.Arguments) + self.__ids__ = data.flatten() + ids = swig_paddle.IVector.create(self.__ids__) + argument.setSlotIds(self.pos, ids) + + +class SequenceConvert(IDataConverter): + def __init__(self, input_type, pos, inner_convert, setter): + """ + :param input_type: the type of input data + :type input_type: dp2.InputType + :param pos: the position of this input + :type pos: int + :param inner_convert: DataConvert type + :type inner_convert: DenseConvert|SparseBinaryConvert| + SparseFloatConvert|IndexConvert + :param setter: + :type setter: + """ + IDataConverter.__init__(self, input_type, pos) + self.__seq__ = [0] + self.__inner_convert__ = inner_convert + self.__setter__ = setter + + def fill_seq(self, data): + for each in data: + self.__seq__.append(self.__seq__[-1] + self.get_size(each)) + + def convert(self, data, argument): + fill_seq(data) + seq = swig_paddle.IVector.create(self.__seq__, False) + self.__setter__(argument, self.pos, seq) + + dat = [] + for each in data: + dat.append(each) + self.__inner_scanner__.convert(dat, argument) + + def get_size(self, data): + if isinstance(self.__inner_scanner__, SequenceConvert): + return sum(self.__inner_scanner__.get_size(item) for item in dat) + else: + return len(data) + + +class DataConverter(object): + def __init__(self, input_mapper): + """ + Usege: + + .. code-block:: python + inputs = [('image', dense_vector), ('label', integer_value)] + cvt = DataConverter(inputs) + arg = cvt.convert(minibatch_data, {'image':0, 'label':1}) + + :param input_mapper: list of (input_name, input_type) + :type input_mapper: list + """ + assert isinstance(self.input_types, collections.Sequence) + self.input_names = [] + self.input_types = [] + for each in self.input_types: + self.input_names.append(each[0]) + self.input_types.append(each[1]) + assert isinstance(each[1], dp2.InputType) + + def convert(self, data, input_dict=None, argument=None): + """ + Convert minibatch data to Paddle's argument. The data is numpy array + or list. + + :param data: input samples, for example, [column0, column1, ...] or + (column0, column1, ...) each column is one minibatch + feature. Note, if only one column featrue, data also + shuld be a list or tupe, [column0] or (column0). + :type data: list|tuple + :param input_dict: a dictionary to specify the correspondence + of data_layer and input data. If None, + the feature order in argument and data is the same. + :type input_dict: dict, like {string:integer, string, integer, ...}|None + :param argument: converted data will be saved in this argument. If None, + it will create a swig_paddle.Arguments firstly. + :param type: swig_paddle.Arguments|None + """ + if argument is None: + argument = swig_paddle.Arguments.createArguments(0) + assert isinstance(argument, swig_paddle.Arguments) + argument.resize(len(self.input_types)) + + converts = [ + DataConverter.create_scanner(i, each_type) + for i, each_type in enumerate(self.input_types) + ] + + for i, cvt in enumerate(converts): + if input_dict is not None: + dat = data[input_dict[self.input_names[i]]] + else: + dat = data[i] + cvt.convert(dat, argument) + + return argument + + def __call__(self, dat, argument=None): + return self.convert(dat, argument) + + @staticmethod + def create_scanner(pos, each): + assert isinstance(each, dp2.InputType) + retv = None + if each.type == dp2.DataType.Dense: + retv = DenseConvert(each, pos) + elif each.type == dp2.DataType.Index: + retv = IndexConvert(each, pos) + elif each.type == dp2.DataType.SparseNonValue: + retv = SparseBinaryConvert(each, pos) + elif each.type == dp2.DataType.SparseValue: + retv = SparseFloatConvert(each, pos) + assert retv is not None + + if each.seq_type == dp2.SequenceType.SUB_SEQUENCE: + retv = SequenceConvert( + each, pos, retv, + lambda arg, pos, seq: arg.setSlotSubSequenceStartPositions(pos, seq) + ) + + if each.seq_type in [ + dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE + ]: + retv = SequenceConvert( + each, pos, retv, + lambda arg, pos, seq: arg.setSlotSequenceStartPositions(pos, seq) + ) + return retv From 733da9b9e62fb20a5adfe12f23834b7fa184dd63 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 17 Feb 2017 19:45:52 +0800 Subject: [PATCH 07/35] data converter --- python/paddle/v2/data_converter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/v2/data_converter.py b/python/paddle/v2/data_converter.py index 5d7b8a736b..45114b407d 100644 --- a/python/paddle/v2/data_converter.py +++ b/python/paddle/v2/data_converter.py @@ -15,6 +15,7 @@ import collections import py_paddle.swig_paddle import numpy +import paddle.trainer.PyDataProvider2 as dp2 __all__ = ['DataConverter'] From e6232d82e1650dc2186fa39c93a06a7ef276fc52 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Sun, 19 Feb 2017 18:22:33 +0800 Subject: [PATCH 08/35] testing in mnist --- python/paddle/v2/__init__.py | 3 +- python/paddle/v2/data_converter.py | 50 ++++++++++++++---------------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index bc064a21ae..4ecd0dafd6 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,7 +20,8 @@ import event import py_paddle.swig_paddle as api __all__ = [ - 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event' + 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', + 'event', 'data_converter' ] diff --git a/python/paddle/v2/data_converter.py b/python/paddle/v2/data_converter.py index 45114b407d..afb98a77c5 100644 --- a/python/paddle/v2/data_converter.py +++ b/python/paddle/v2/data_converter.py @@ -13,8 +13,8 @@ # limitations under the License. import collections -import py_paddle.swig_paddle -import numpy +import py_paddle.swig_paddle as api +import numpy as np import paddle.trainer.PyDataProvider2 as dp2 __all__ = ['DataConverter'] @@ -50,12 +50,12 @@ class DenseConvert(IDataConverter): :param data: input data :type data: list | numpy array :param argument: the type which paddle is acceptable - :type argument: swig_paddle.Arguments + :type argument: Paddle's Arguments """ - assert isinstance(argument, swig_paddle.Arguments) - if data.dtype != numpy.float32: - data = data.astype(numpy.float32) - m = swig_paddle.Matrix.createDenseFromNumpy(data, True, False) + assert isinstance(argument, api.Arguments) + if data.dtype != np.float32: + data = data.astype(np.float32) + m = api.Matrix.createDenseFromNumpy(data, True, False) argument.setSlotValue(self.pos, m) @@ -72,17 +72,16 @@ class SparseBinaryConvert(IDataConverter): self.__height__ = len(data) for x in data: self.__rows__.append(self.__rows__[-1] + len(x)) - self__cols__ = data.flatten() + self.__cols__ = data.flatten() def convert(self, data, argument): - assert isinstance(argument, swig_paddle.Arguments) + assert isinstance(argument, api.Arguments) fill_csr(data) - m = swig_paddle.Matrix.createSparse(self.__height__, - self.input_type.dim, - len(self.__cols__), - len(self.__value__) == 0) - assert isinstance(m, swig_paddle.Matrix) + m = api.Matrix.createSparse(self.__height__, self.input_type.dim, + len(self.__cols__), + len(self.__value__) == 0) + assert isinstance(m, api.Matrix) m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__) argument.setSlotValue(self.pos, m) @@ -105,9 +104,9 @@ class IndexConvert(IDataConverter): self.__ids__ = [] def convert(self, data, argument): - assert isinstance(argument, swig_paddle.Arguments) + assert isinstance(argument, api.Arguments) self.__ids__ = data.flatten() - ids = swig_paddle.IVector.create(self.__ids__) + ids = api.IVector.create(self.__ids__) argument.setSlotIds(self.pos, ids) @@ -135,7 +134,7 @@ class SequenceConvert(IDataConverter): def convert(self, data, argument): fill_seq(data) - seq = swig_paddle.IVector.create(self.__seq__, False) + seq = api.IVector.create(self.__seq__, False) self.__setter__(argument, self.pos, seq) dat = [] @@ -151,22 +150,21 @@ class SequenceConvert(IDataConverter): class DataConverter(object): - def __init__(self, input_mapper): + def __init__(self, input): """ Usege: .. code-block:: python inputs = [('image', dense_vector), ('label', integer_value)] cvt = DataConverter(inputs) - arg = cvt.convert(minibatch_data, {'image':0, 'label':1}) + arg = cvt(minibatch_data, {'image':0, 'label':1}) :param input_mapper: list of (input_name, input_type) :type input_mapper: list """ - assert isinstance(self.input_types, collections.Sequence) self.input_names = [] self.input_types = [] - for each in self.input_types: + for each in input: self.input_names.append(each[0]) self.input_types.append(each[1]) assert isinstance(each[1], dp2.InputType) @@ -186,16 +184,16 @@ class DataConverter(object): the feature order in argument and data is the same. :type input_dict: dict, like {string:integer, string, integer, ...}|None :param argument: converted data will be saved in this argument. If None, - it will create a swig_paddle.Arguments firstly. + it will create a Paddle's Arguments firstly. :param type: swig_paddle.Arguments|None """ if argument is None: - argument = swig_paddle.Arguments.createArguments(0) - assert isinstance(argument, swig_paddle.Arguments) + argument = api.Arguments.createArguments(0) + assert isinstance(argument, api.Arguments) argument.resize(len(self.input_types)) converts = [ - DataConverter.create_scanner(i, each_type) + DataConverter.create_converter(i, each_type) for i, each_type in enumerate(self.input_types) ] @@ -212,7 +210,7 @@ class DataConverter(object): return self.convert(dat, argument) @staticmethod - def create_scanner(pos, each): + def create_converter(pos, each): assert isinstance(each, dp2.InputType) retv = None if each.type == dp2.DataType.Dense: From f846e8fec2e9c8d16e24e12d6b6b7efc77d3cc83 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 20 Feb 2017 15:13:43 +0800 Subject: [PATCH 09/35] Add const for getLayerOutput --- paddle/gserver/gradientmachines/GradientMachine.h | 2 +- .../gradientmachines/MultiGradientMachine.cpp | 12 ++++++------ .../gserver/gradientmachines/MultiGradientMachine.h | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.cpp | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.h | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index a814e771d1..5469c0d89f 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,7 +134,7 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) { + virtual MatrixPtr getLayerOutput(const std::string& layerName) const { return nullptr; } diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 7e60920376..2d42e64830 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,7 +282,8 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { +MatrixPtr MultiGradientMachine::getLayerOutput( + const std::string& layerName) const { // each thread has the same neural network auto nn = threads_[0]->getGradientMachine(); size_t height = 0; @@ -301,11 +302,10 @@ MatrixPtr MultiGradientMachine::getLayerOutput(const std::string& layerName) { // copy one layer output from one trainer thread at each time size_t startRow = 0; - - for (size_t i = 0; i < threads_.size(); i++) { - auto tmpMatrix = layerOutput->subMatrix(startRow, mats[i]->getHeight()); - tmpMatrix->copyFrom(*mats[i]); - startRow += mats[i]->getHeight(); + for (auto& mat : mats) { + auto tmpMatrix = layerOutput->subMatrix(startRow, mat->getHeight()); + tmpMatrix->copyFrom(*mat); + startRow += mat->getHeight(); } return layerOutput; diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 988d509817..a1a2d41706 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual MatrixPtr getLayerOutput(const std::string& layerName); + virtual MatrixPtr getLayerOutput(const std::string& layerName) const; virtual void onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 1f9ace4f67..00887c81d4 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,7 +293,7 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { +MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) const { auto it = layerMap_.find(layerName); CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; return it->second->getOutputValue(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index bf9ed09327..6ecc251a40 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual MatrixPtr getLayerOutput(const std::string& layerName); + virtual MatrixPtr getLayerOutput(const std::string& layerName) const; const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); From 67b8150ff4d04552a5a52cb099bf7e935765e69f Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 21 Feb 2017 13:27:21 +0800 Subject: [PATCH 10/35] data converter test --- paddle/data_converter_test.py | 92 +++++++++++++++++++++++++ python/paddle/v2/data_converter.py | 19 ++--- python/paddle/v2/data_converter_test.py | 92 +++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 8 deletions(-) create mode 100644 paddle/data_converter_test.py create mode 100644 python/paddle/v2/data_converter_test.py diff --git a/paddle/data_converter_test.py b/paddle/data_converter_test.py new file mode 100644 index 0000000000..d84ee51727 --- /dev/null +++ b/paddle/data_converter_test.py @@ -0,0 +1,92 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import py_paddle.swig_paddle as api +import numpy as np +import paddle.trainer.PyDataProvider2 as dp2 + +from paddle.v2.data_converter import DataConverter + + +class DataConverterTest(unittest.TestCase): + def dense_reader(self, shape): + data = np.random.random(shape) + return data + + def sparse_binary_reader(self, + high, + size_limit, + batch_size, + non_empty=False): + data = [] + for i in xrange(batch_size): + num = np.random.randint(size_limit) # num could be 0 + while non_empty and num == 0: + num = np.random.randint(size_limit) + data.append(np.random.randint(high, size=num).tolist()) + + return data + + def test_dense_vector(self): + def compare(input): + converter = DataConverter([('image', dp2.dense_vector(784))]) + arg = converter([input], {'image': 0}) + output = arg.getSlotValue(0).copyToNumpyMat() + input = np.array(input, dtype='float32') + self.assertAlmostEqual(input.all(), output.all()) + + # test numpy array + data = self.dense_reader(shape=[32, 784]) + compare(data) + + # test list + compare(data.tolist()) + + #def test_sparse_binary(self): + # dim = 100000 + # data = self.sparse_binary_reader(dim, 5, 2) + # converter = DataConverter([('input', dp2.sparse_binary_vector(dim))]) + # arg = converter([data], {'input':0}) + # output = arg.getSlotValue(0) + + #def test_sparse(self): + # dim = 100000 + # v = self.sparse_binary_reader(dim, 5, 2) + # w = [] + # for dat in data: + # x = self.dense_reader(shape=[1, len(dat)]) + # w.append(x.tolist()) + # data = [] + # for each in zip(v, w): + # data.append(zip(each[0], each[1])) + # + # converter = DataConverter([('input', dp2.sparse_binary_vector(dim))]) + # arg = converter([data], {'input':0}) + # output = arg.getSlotValue(0) + + def test_integer(self): + dim = 100 + index = np.random.randint(dim, size=32) + print index + converter = DataConverter([('input', dp2.integer_value(dim))]) + arg = converter([index], {'input': 0}) + print arg.getSlotValue(0) + output = arg.getSlotValue(0).copyToNumpyArray() + print 'output=', output + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/data_converter.py b/python/paddle/v2/data_converter.py index afb98a77c5..fcba43e4ba 100644 --- a/python/paddle/v2/data_converter.py +++ b/python/paddle/v2/data_converter.py @@ -53,9 +53,9 @@ class DenseConvert(IDataConverter): :type argument: Paddle's Arguments """ assert isinstance(argument, api.Arguments) - if data.dtype != np.float32: - data = data.astype(np.float32) - m = api.Matrix.createDenseFromNumpy(data, True, False) + # TODO: handle data type (float, double, ...) + data = np.array(data, np.float32) + m = api.Matrix.createDenseFromNumpy(data) argument.setSlotValue(self.pos, m) @@ -72,12 +72,12 @@ class SparseBinaryConvert(IDataConverter): self.__height__ = len(data) for x in data: self.__rows__.append(self.__rows__[-1] + len(x)) - self.__cols__ = data.flatten() + self.__cols__.extend(x) def convert(self, data, argument): assert isinstance(argument, api.Arguments) - fill_csr(data) + self.fill_csr(data) m = api.Matrix.createSparse(self.__height__, self.input_type.dim, len(self.__cols__), len(self.__value__) == 0) @@ -94,8 +94,8 @@ class SparseFloatConvert(SparseBinaryConvert): self.__height__ = len(data) for x in data: self.__rows__.append(self.__rows__[-1] + len(x)) - self.__cols__.extend((x[0] for x in data)) - self.__value__.extend((x[1] for x in data)) + self.__cols__.extend(x[0]) + self.__value__.extend(x[1]) class IndexConvert(IDataConverter): @@ -105,7 +105,10 @@ class IndexConvert(IDataConverter): def convert(self, data, argument): assert isinstance(argument, api.Arguments) - self.__ids__ = data.flatten() + #for x in data: + # self.__ids__.append(x) + self.__ids__.extend(x) + ids = api.IVector.create(self.__ids__) argument.setSlotIds(self.pos, ids) diff --git a/python/paddle/v2/data_converter_test.py b/python/paddle/v2/data_converter_test.py new file mode 100644 index 0000000000..d84ee51727 --- /dev/null +++ b/python/paddle/v2/data_converter_test.py @@ -0,0 +1,92 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import py_paddle.swig_paddle as api +import numpy as np +import paddle.trainer.PyDataProvider2 as dp2 + +from paddle.v2.data_converter import DataConverter + + +class DataConverterTest(unittest.TestCase): + def dense_reader(self, shape): + data = np.random.random(shape) + return data + + def sparse_binary_reader(self, + high, + size_limit, + batch_size, + non_empty=False): + data = [] + for i in xrange(batch_size): + num = np.random.randint(size_limit) # num could be 0 + while non_empty and num == 0: + num = np.random.randint(size_limit) + data.append(np.random.randint(high, size=num).tolist()) + + return data + + def test_dense_vector(self): + def compare(input): + converter = DataConverter([('image', dp2.dense_vector(784))]) + arg = converter([input], {'image': 0}) + output = arg.getSlotValue(0).copyToNumpyMat() + input = np.array(input, dtype='float32') + self.assertAlmostEqual(input.all(), output.all()) + + # test numpy array + data = self.dense_reader(shape=[32, 784]) + compare(data) + + # test list + compare(data.tolist()) + + #def test_sparse_binary(self): + # dim = 100000 + # data = self.sparse_binary_reader(dim, 5, 2) + # converter = DataConverter([('input', dp2.sparse_binary_vector(dim))]) + # arg = converter([data], {'input':0}) + # output = arg.getSlotValue(0) + + #def test_sparse(self): + # dim = 100000 + # v = self.sparse_binary_reader(dim, 5, 2) + # w = [] + # for dat in data: + # x = self.dense_reader(shape=[1, len(dat)]) + # w.append(x.tolist()) + # data = [] + # for each in zip(v, w): + # data.append(zip(each[0], each[1])) + # + # converter = DataConverter([('input', dp2.sparse_binary_vector(dim))]) + # arg = converter([data], {'input':0}) + # output = arg.getSlotValue(0) + + def test_integer(self): + dim = 100 + index = np.random.randint(dim, size=32) + print index + converter = DataConverter([('input', dp2.integer_value(dim))]) + arg = converter([index], {'input': 0}) + print arg.getSlotValue(0) + output = arg.getSlotValue(0).copyToNumpyArray() + print 'output=', output + + +if __name__ == '__main__': + unittest.main() From 15180e85acaa400c629a37fadcf4589b7c086c7d Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 21 Feb 2017 15:50:01 +0800 Subject: [PATCH 11/35] remove some code --- paddle/data_converter_test.py | 92 ----------------------------------- 1 file changed, 92 deletions(-) delete mode 100644 paddle/data_converter_test.py diff --git a/paddle/data_converter_test.py b/paddle/data_converter_test.py deleted file mode 100644 index d84ee51727..0000000000 --- a/paddle/data_converter_test.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import py_paddle.swig_paddle as api -import numpy as np -import paddle.trainer.PyDataProvider2 as dp2 - -from paddle.v2.data_converter import DataConverter - - -class DataConverterTest(unittest.TestCase): - def dense_reader(self, shape): - data = np.random.random(shape) - return data - - def sparse_binary_reader(self, - high, - size_limit, - batch_size, - non_empty=False): - data = [] - for i in xrange(batch_size): - num = np.random.randint(size_limit) # num could be 0 - while non_empty and num == 0: - num = np.random.randint(size_limit) - data.append(np.random.randint(high, size=num).tolist()) - - return data - - def test_dense_vector(self): - def compare(input): - converter = DataConverter([('image', dp2.dense_vector(784))]) - arg = converter([input], {'image': 0}) - output = arg.getSlotValue(0).copyToNumpyMat() - input = np.array(input, dtype='float32') - self.assertAlmostEqual(input.all(), output.all()) - - # test numpy array - data = self.dense_reader(shape=[32, 784]) - compare(data) - - # test list - compare(data.tolist()) - - #def test_sparse_binary(self): - # dim = 100000 - # data = self.sparse_binary_reader(dim, 5, 2) - # converter = DataConverter([('input', dp2.sparse_binary_vector(dim))]) - # arg = converter([data], {'input':0}) - # output = arg.getSlotValue(0) - - #def test_sparse(self): - # dim = 100000 - # v = self.sparse_binary_reader(dim, 5, 2) - # w = [] - # for dat in data: - # x = self.dense_reader(shape=[1, len(dat)]) - # w.append(x.tolist()) - # data = [] - # for each in zip(v, w): - # data.append(zip(each[0], each[1])) - # - # converter = DataConverter([('input', dp2.sparse_binary_vector(dim))]) - # arg = converter([data], {'input':0}) - # output = arg.getSlotValue(0) - - def test_integer(self): - dim = 100 - index = np.random.randint(dim, size=32) - print index - converter = DataConverter([('input', dp2.integer_value(dim))]) - arg = converter([index], {'input': 0}) - print arg.getSlotValue(0) - output = arg.getSlotValue(0).copyToNumpyArray() - print 'output=', output - - -if __name__ == '__main__': - unittest.main() From bb625337e37c06cec40954abb83fde8b2716d44a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 22 Feb 2017 15:28:02 +0800 Subject: [PATCH 12/35] add some comments --- python/paddle/v2/data_feeder.py | 41 +++++++++++++++++++++++++++++++++ python/paddle/v2/trainer.py | 1 - 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 2a0b6bbeb5..83a4efef9e 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -15,5 +15,46 @@ from py_paddle import DataProviderConverter __all__ = ['DataFeeder'] +""" +DataFeeder converts the data returned by paddle.reader into a data structure +of Arguments which is defined in the API. The paddle.reader usually returns +a list of mini-batch data. Each item in the list is a tuple or list, which is +one sample with multiple features. DataFeeder converts this mini-batch data +into Arguments in order to feed it to C++ interface. + +The example usage: + + data_types = [paddle.data_type.dense_vector(784), + paddle.data_type.integer_value(10)] + feeder = DataFeeder(input_types=data_types) + minibatch_data = [ + ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample + ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample + ] + + # or + # minibatch_data = [ + # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample + # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample + # ] + arg = feeder(minibatch_data) + + +Args: + input_types: A list of input data types. It's length is equal to the length + of data returned by paddle.reader. Each item specifies the type + of each feature. + mintbatch_data: A list of mini-batch data. Each item is a list or tuple, + for example: + [ + (feature_0, feature_1, feature_2, ...), # first sample + (feature_0, feature_1, feature_2, ...), # second sample + ... + ] + +Returns: + An Arguments object contains this mini-batch data with multiple features. + The Arguments definition is in the API. +""" DataFeeder = DataProviderConverter diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 7480a3fb84..5709c7e886 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -89,7 +89,6 @@ class SGD(ITrainer): event_handler = default_event_handler topology = v2_layer.parse_network(topology) - print topology __check_train_args__(**locals()) From fbf864362dc1bd716a8db1f4441afe488fe3d74b Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 22 Feb 2017 16:02:58 +0800 Subject: [PATCH 13/35] Update python getLayerOutputs --- demo/image_classification/prediction.py | 2 +- demo/model_zoo/resnet/classify.py | 2 +- paddle/api/Arguments.cpp | 7 ++++ paddle/api/GradientMachine.cpp | 5 ++- paddle/api/PaddleAPI.h | 5 +-- paddle/api/Trainer.cpp | 8 ++--- .../gradientmachines/GradientMachine.h | 4 +-- .../gradientmachines/MultiGradientMachine.cpp | 32 +++++-------------- .../gradientmachines/MultiGradientMachine.h | 4 ++- .../gradientmachines/NeuralNetwork.cpp | 6 ++-- .../gserver/gradientmachines/NeuralNetwork.h | 2 +- paddle/gserver/layers/CosSimLayer.cpp | 2 +- paddle/py_paddle/util.py | 2 +- 13 files changed, 35 insertions(+), 46 deletions(-) diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py index 9a86aafcb2..49c0ff600c 100755 --- a/demo/image_classification/prediction.py +++ b/demo/image_classification/prediction.py @@ -126,7 +126,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - return output[output_layer].mean(0) + return output[output_layer]['value'].mean(0) def predict(self, image=None, output_layer=None): assert isinstance(image, basestring) diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py index 4631816c43..6074cc1d3a 100755 --- a/demo/model_zoo/resnet/classify.py +++ b/demo/model_zoo/resnet/classify.py @@ -156,7 +156,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - res[name] = output[name].mean(0) + res[name] = output[name]['value'].mean(0) return res diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp index 41beed38a8..a3f4bfffc9 100644 --- a/paddle/api/Arguments.cpp +++ b/paddle/api/Arguments.cpp @@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { return args; } +Arguments* Arguments::createByPaddleArgument(const void* ptr) { + auto p = (paddle::Argument*)(ptr); + auto args = new Arguments(); + args->m->outputs.push_back(*p); + return args; +} + Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { auto& a = m->getArg(idx); return Matrix::createByPaddleMatrixPtr(&a.value); diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index a44763bfa5..a64e70a6bd 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -144,12 +144,11 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { void GradientMachine::randParameters() { m->machine->randParameters(); } -Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const +Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { auto nn = m->machine; if (nn) { - auto mat = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&mat); + return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); } else { throw UnsupportError(); } diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index f5af8b0035..10569a7170 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -454,6 +454,7 @@ public: private: static Arguments* createByPaddleArgumentVector(void* ptr); + static Arguments* createByPaddleArgument(const void* ptr); void* getInternalArgumentsPtr() const; private: @@ -769,7 +770,7 @@ public: void randParameters(); - Matrix* getLayerOutput(const std::string& layerName) const + Arguments* getLayerOutput(const std::string& layerName) const throw(UnsupportError); /** @@ -952,7 +953,7 @@ public: Arguments* getForwardOutput(); - Matrix* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName); }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index d83dc380be..c742614aff 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,12 +131,10 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Matrix* Trainer::getLayerOutput(const std::string& layerName) { - auto nn = std::dynamic_pointer_cast( - this->m->getGradientMachine()); +Arguments* Trainer::getLayerOutput(const std::string& layerName) { + auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - auto m = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&m); + return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 5469c0d89f..ae39783c6b 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,8 +134,8 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) const { - return nullptr; + virtual const Argument& getLayerOutput(const std::string& layerName) { + return *((Argument*)nullptr); } // see comment in Layer.h for the function with the same name diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 2d42e64830..6b11b0155e 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,33 +282,17 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -MatrixPtr MultiGradientMachine::getLayerOutput( - const std::string& layerName) const { - // each thread has the same neural network - auto nn = threads_[0]->getGradientMachine(); - size_t height = 0; - size_t width = nn->getLayerOutput(layerName)->getWidth(); - std::vector mats; - mats.reserve(threads_.size()); - for (auto& thread : threads_) { - MatrixPtr out = thread->getGradientMachine()->getLayerOutput(layerName); - mats.push_back(out); - height += out->getHeight(); - CHECK_EQ(width, out->getWidth()); - } +const Argument& MultiGradientMachine::getLayerOutput( + const std::string& layerName) { + std::vector args; + args.reserve(threads_.size()); - MatrixPtr layerOutput; - Matrix::resizeOrCreate(layerOutput, height, width, false, false); - - // copy one layer output from one trainer thread at each time - size_t startRow = 0; - for (auto& mat : mats) { - auto tmpMatrix = layerOutput->subMatrix(startRow, mat->getHeight()); - tmpMatrix->copyFrom(*mat); - startRow += mat->getHeight(); + for (auto& thread : threads_) { + args.push_back(thread->getGradientMachine()->getLayerOutput(layerName)); } + outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_); - return layerOutput; + return outLayerArgs_; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index a1a2d41706..9083230afd 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual MatrixPtr getLayerOutput(const std::string& layerName) const; + virtual const Argument& getLayerOutput(const std::string& layerName); virtual void onPassEnd(); @@ -316,6 +316,8 @@ protected: std::vector outArgs_; hl_stream_t outArgStream_; + Argument outLayerArgs_; + /// ParameterType which needs to be merged from each GPU std::vector mergeTypes_; int numDevices_; /* number of gpu devices */ diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 00887c81d4..d1afde40e1 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,10 +293,8 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) const { - auto it = layerMap_.find(layerName); - CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; - return it->second->getOutputValue(); +const Argument& NeuralNetwork::getLayerOutput(const std::string& layerName) { + return getLayer(layerName)->getOutput(); } void NeuralNetwork::onPassEnd() { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 6ecc251a40..b4dc38e31b 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual MatrixPtr getLayerOutput(const std::string& layerName) const; + virtual const Argument& getLayerOutput(const std::string& layerName); const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index 1501c74370..57ba124e40 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -68,7 +68,7 @@ void CosSimLayer::forward(PassType passType) { void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; const auto outG = this->getOutputGrad(); const auto outV = this->getOutputValue(); diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index ce105d249a..a708def1d2 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -208,7 +208,7 @@ def __monkeypatch_gradient_machine__(): output = dict() for name in layerNames: - output[name] = __matrix_to_numpy__(self.getLayerOutput(name)) + output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) return output swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs From bbfcee20fd8733c2e9ea5ec49e1963d62e777730 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 22 Feb 2017 17:42:08 +0800 Subject: [PATCH 14/35] Add const for Trainer::getLayerOutput --- paddle/api/PaddleAPI.h | 2 +- paddle/api/Trainer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index 10569a7170..59798796a2 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -953,7 +953,7 @@ public: Arguments* getForwardOutput(); - Arguments* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName) const; }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index c742614aff..29cf2aa450 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,7 +131,7 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Arguments* Trainer::getLayerOutput(const std::string& layerName) { +Arguments* Trainer::getLayerOutput(const std::string& layerName) const { auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); From 51de2ded3ecf674bd5f96a9f3129d6630bfb65a1 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 23 Feb 2017 16:59:38 +0800 Subject: [PATCH 15/35] add optimizer in v2 --- python/paddle/v2/optimizer.py | 55 +++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index aa2942bc9f..10e255dc94 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -3,7 +3,10 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.v2 -__all__ = ['Adam', 'Adamax'] +__all__ = [ + 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', + 'RMSProp', 'ModelAverage', 'L2Regularization' +] class Optimizer(object): @@ -38,6 +41,14 @@ class Optimizer(object): pass_num) +class Momentum(Optimizer): + def __init__(self, momentum=None, sparse=False, **kwargs): + learning_method = v1_optimizers.MomentumOptimizer( + momentum=None, sparse=False) + super(Momentum, self).__init__( + learning_method=learning_method, **kwargs) + + class Adam(Optimizer): def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs): learning_method = v1_optimizers.AdamOptimizer( @@ -52,7 +63,45 @@ class Adamax(Optimizer): super(Adamax, self).__init__(learning_method=learning_method, **kwargs) +class AdaGrad(Optimizer): + def __init__(self, **kwargs): + learning_method = v1_optimizers.AdaGradOptimizer() + super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs) + + +class DecayedAdaGrad(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): + learning_method = v1_optimizers.DecayedAdaGradOptimizer( + rho=rho, epsilon=epsilon) + super(DecayedAdaGrad, self).__init__( + learning_method=learning_method, **kwargs) + + +class AdaDelta(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): + learning_method = v1_optimizers.AdaDeltaOptimizer( + rho=rho, epsilon=epsilon) + super(AdaDelta, self).__init__( + learning_method=learning_method, **kwargs) + + +class RMSProp(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): + learning_method = v1_optimizers.RMSPropOptimizer( + rho=rho, epsilon=epsilon) + super(RMSProp, self).__init__(learning_method=learning_method, **kwargs) + + +ModelAverage = v1_optimizers.ModelAverage +L2Regularization = v1_optimizers.L2Regularization + if __name__ == '__main__': swig_api.initPaddle('--use_gpu=false') - opt = paddle.v2.optimizer.Adam() - print opt.enable_types() + for opt in [ + Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), + AdaDelta(), RMSProp(), Adam( + model_average=ModelAverage(average_window=0.5), + regularization=L2Regularization(rate=0.5), + gradient_clipping_threshold=25) + ]: + print opt, opt.enable_types() From 950b4a312697fe8b5132437d85b919353af15365 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 23 Feb 2017 17:24:01 +0800 Subject: [PATCH 16/35] change return type Argument --- paddle/api/GradientMachine.cpp | 3 ++- paddle/api/Trainer.cpp | 3 ++- paddle/gserver/gradientmachines/GradientMachine.h | 2 +- paddle/gserver/gradientmachines/MultiGradientMachine.cpp | 3 +-- paddle/gserver/gradientmachines/MultiGradientMachine.h | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.cpp | 2 +- paddle/gserver/gradientmachines/NeuralNetwork.h | 2 +- paddle/gserver/layers/CosSimVecMatLayer.cpp | 4 ++-- 8 files changed, 11 insertions(+), 10 deletions(-) diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index a64e70a6bd..538ca2999f 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -148,7 +148,8 @@ Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { auto nn = m->machine; if (nn) { - return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } else { throw UnsupportError(); } diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index 29cf2aa450..84e4ca054a 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -134,7 +134,8 @@ void Trainer::finishTestPeriod() { m->finishTestPeriod(); } Arguments* Trainer::getLayerOutput(const std::string& layerName) const { auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index ae39783c6b..bc2f2f8563 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,7 +134,7 @@ public: backward(callback); } - virtual const Argument& getLayerOutput(const std::string& layerName) { + virtual Argument getLayerOutput(const std::string& layerName) { return *((Argument*)nullptr); } diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 6b11b0155e..123273f916 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,8 +282,7 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -const Argument& MultiGradientMachine::getLayerOutput( - const std::string& layerName) { +Argument MultiGradientMachine::getLayerOutput(const std::string& layerName) { std::vector args; args.reserve(threads_.size()); diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 9083230afd..838a52b515 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual const Argument& getLayerOutput(const std::string& layerName); + virtual Argument getLayerOutput(const std::string& layerName); virtual void onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index d1afde40e1..2f2aa24aac 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,7 +293,7 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -const Argument& NeuralNetwork::getLayerOutput(const std::string& layerName) { +Argument NeuralNetwork::getLayerOutput(const std::string& layerName) { return getLayer(layerName)->getOutput(); } diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index b4dc38e31b..e7b6c43840 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual const Argument& getLayerOutput(const std::string& layerName); + virtual Argument getLayerOutput(const std::string& layerName); const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index aabafd473a..0f887d8adf 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -112,7 +112,7 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); - CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); @@ -145,7 +145,7 @@ void CosSimVecMatLayer::forward(PassType passType) { } void CosSimVecMatLayer::backward(const UpdateCallback& callback) { - CHECK_EQ(backward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); From d1ab3c80744a851164dd4dc76a847193eb4c5562 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 23 Feb 2017 19:20:33 +0800 Subject: [PATCH 17/35] MNIST dataset reader implementation --- python/paddle/v2/data_set/__init__.py | 0 python/paddle/v2/data_set/mnist.py | 62 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 python/paddle/v2/data_set/__init__.py create mode 100644 python/paddle/v2/data_set/mnist.py diff --git a/python/paddle/v2/data_set/__init__.py b/python/paddle/v2/data_set/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py new file mode 100644 index 0000000000..34f61bb9f6 --- /dev/null +++ b/python/paddle/v2/data_set/mnist.py @@ -0,0 +1,62 @@ +import sklearn.datasets.mldata +import sklearn.model_selection +import numpy + +__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] + +DATA_HOME = None + + +def __mnist_reader__(data, target): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield data[i].astype(numpy.float32), int(target[i]) + + +class MNISTReader(object): + """ + mnist dataset reader. The `train_reader` and `test_reader` method returns + a iterator of each sample. Each sample is combined by 784-dim float and a + one-dim label + """ + + def __init__(self, random_state): + data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) + n_train = 60000 + self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( + data.data / 255.0, + data.target.astype("int"), + train_size=n_train, + random_state=random_state) + + def train_reader(self): + return __mnist_reader__(self.X_train, self.y_train) + + def test_reader(self): + return __mnist_reader__(self.X_test, self.y_test) + + +__default_instance__ = MNISTReader(0) + + +def train_reader_creator(): + """ + Default train set reader creator. + """ + return __default_instance__.train_reader + + +def test_reader_creator(): + """ + Default test set reader creator. + """ + return __default_instance__.test_reader + + +def unittest(): + assert len(list(train_reader_creator()())) == 60000 + + +if __name__ == '__main__': + unittest() From 84b423a89a2a6ece21910c83277f6282b80f6be7 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 23 Feb 2017 17:35:06 +0800 Subject: [PATCH 18/35] refine data feeder and add unit test --- python/CMakeLists.txt | 1 + python/paddle/v2/data_feeder.py | 110 ++++++++++----- python/paddle/v2/tests/CMakeLists.txt | 2 + python/paddle/v2/tests/run_tests.sh | 36 +++++ python/paddle/v2/tests/test_data_feeder.py | 150 +++++++++++++++++++++ 5 files changed, 264 insertions(+), 35 deletions(-) create mode 100644 python/paddle/v2/tests/CMakeLists.txt create mode 100755 python/paddle/v2/tests/run_tests.sh create mode 100644 python/paddle/v2/tests/test_data_feeder.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 357637e203..71af50a9a4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -25,6 +25,7 @@ add_custom_target(paddle_python ALL DEPENDS add_subdirectory(paddle/trainer_config_helpers/tests) add_subdirectory(paddle/reader/tests) +add_subdirectory(paddle/v2/tests) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ DESTINATION opt/paddle/share/wheels diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 83a4efef9e..b594643dda 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -12,49 +12,89 @@ # See the License for the specific language governing permissions and # limitations under the License. +from py_paddle import swig_paddle from py_paddle import DataProviderConverter +import data_type __all__ = ['DataFeeder'] -""" -DataFeeder converts the data returned by paddle.reader into a data structure -of Arguments which is defined in the API. The paddle.reader usually returns -a list of mini-batch data. Each item in the list is a tuple or list, which is -one sample with multiple features. DataFeeder converts this mini-batch data -into Arguments in order to feed it to C++ interface. - -The example usage: - - data_types = [paddle.data_type.dense_vector(784), - paddle.data_type.integer_value(10)] - feeder = DataFeeder(input_types=data_types) - minibatch_data = [ - ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample - ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample - ] - - # or - # minibatch_data = [ - # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample - # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample - # ] - arg = feeder(minibatch_data) - - -Args: - input_types: A list of input data types. It's length is equal to the length - of data returned by paddle.reader. Each item specifies the type - of each feature. - mintbatch_data: A list of mini-batch data. Each item is a list or tuple, + + +class DataFeeder(DataProviderConverter): + """ + DataFeeder converts the data returned by paddle.reader into a data structure + of Arguments which is defined in the API. The paddle.reader usually returns + a list of mini-batch data. Each item in the list is a list or a tuple, + which is one sample with one or multiple features. DataFeeder converts this + mini-batch data into Arguments in order to feed it to C++ interface. + + The example usage: + + data_types = [('image', paddle.data_type.dense_vector(784)), + ('label', paddle.data_type.integer_value(10))] + reader_dict = {'image':0, 'label':1} + feeder = DataFeeder(data_types=data_types, reader_dict=reader_dict) + minibatch_data = [ + ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample + ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample + ] + arg = feeder(minibatch_data) + """ + + def __init__(self, data_types, reader_dict): + """ + :param data_types: A list to specify data name and type. Each item is + a tuple of (data_name, data_type). For example: + [('image', paddle.data_type.dense_vector(784)), + ('label', paddle.data_type.integer_value(10))] + + :type data_types: A list of tuple + :param reader_dict: A dictionary to specify the position of each data + in the input data. + :type reader_dict: dict() + """ + self.input_names = [] + self.input_types = [] + self.reader_dict = reader_dict + for each in data_types: + self.input_names.append(each[0]) + self.input_types.append(each[1]) + assert isinstance(each[1], data_type.InputType) + DataProviderConverter.__init__(self, self.input_types) + + def convert(self, dat, argument=None): + """ + :param dat: A list of mini-batch data. Each item is a list or tuple, for example: [ (feature_0, feature_1, feature_2, ...), # first sample (feature_0, feature_1, feature_2, ...), # second sample ... ] + :type dat: List + :param argument: An Arguments object contains this mini-batch data with + one or multiple features. The Arguments definition is + in the API. + :type argument: swig_paddle.Arguments + """ + + if argument is None: + argument = swig_paddle.Arguments.createArguments(0) + assert isinstance(argument, swig_paddle.Arguments) + argument.resize(len(self.input_types)) + + scanners = [ + DataProviderConverter.create_scanner(i, each_type) + for i, each_type in enumerate(self.input_types) + ] + + for each_sample in dat: + for name, scanner in zip(self.input_names, scanners): + scanner.scan(each_sample[self.reader_dict[name]]) + + for scanner in scanners: + scanner.finish_scan(argument) -Returns: - An Arguments object contains this mini-batch data with multiple features. - The Arguments definition is in the API. -""" + return argument -DataFeeder = DataProviderConverter + def __call__(self, dat, argument=None): + return self.convert(dat, argument) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt new file mode 100644 index 0000000000..5842a716ca --- /dev/null +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -0,0 +1,2 @@ +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) diff --git a/python/paddle/v2/tests/run_tests.sh b/python/paddle/v2/tests/run_tests.sh new file mode 100755 index 0000000000..b96f54fe9c --- /dev/null +++ b/python/paddle/v2/tests/run_tests.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +pushd `dirname $0` > /dev/null +SCRIPTPATH=$PWD +popd > /dev/null + +cd $SCRIPTPATH + +$1 -m pip install ../../../../paddle/dist/*.whl + +test_list="test_data_feeder.py" + +export PYTHONPATH=$PWD/../../../../python/ + +for fn in $test_list +do + echo "test $fn" + $1 $fn + if [ $? -ne 0 ]; then + exit 1 + fi +done diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py new file mode 100644 index 0000000000..dcf433d7d8 --- /dev/null +++ b/python/paddle/v2/tests/test_data_feeder.py @@ -0,0 +1,150 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import py_paddle.swig_paddle as api +import numpy as np + +from paddle.v2 import data_type +from paddle.v2.data_feeder import DataFeeder + + +class DataFeederTest(unittest.TestCase): + def dense_reader(self, size): + data = np.random.random(size) + return data + + def sparse_binary_reader(self, high, size_limit, non_empty=False): + num = np.random.randint(size_limit) # num could be 0 + while non_empty and num == 0: + num = np.random.randint(size_limit) + return np.random.randint(high, size=num).tolist() + + def test_dense_vector(self): + def compare(input): + feeder = DataFeeder([('image', data_type.dense_vector(784))], + {'image': 0}) + arg = feeder([input]) + output = arg.getSlotValue(0).copyToNumpyMat() + input = np.array(input, dtype='float32') + self.assertAlmostEqual(input.all(), output.all()) + + # test numpy array + batch_size = 32 + dim = 784 + data = [] + for i in xrange(batch_size): + data.append(self.dense_reader(784)) + compare(data) + + # test list + data = [] + for i in xrange(batch_size): + data.append(self.dense_reader(784).tolist()) + compare(data) + + def test_sparse_binary(self): + dim = 10000 + batch_size = 32 + data = [] + for i in xrange(batch_size): + data.append([self.sparse_binary_reader(dim, 50)]) + feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))], + {'input': 0}) + arg = feeder(data) + output = arg.getSlotValue(0) + assert isinstance(output, api.Matrix) + for i in xrange(batch_size): + self.assertEqual(output.getSparseRowCols(i), data[i][0]) + + def test_sparse(self): + dim = 10000 + batch_size = 32 + v = [] + w = [] + data = [] + for dat in xrange(batch_size): + a = self.sparse_binary_reader(dim, 40, non_empty=True) + b = self.dense_reader(len(a)).tolist() + v.append(a) + w.append(b[0]) + data.append([zip(a, b)]) + + feeder = DataFeeder([('input', data_type.sparse_vector(dim))], + {'input': 0}) + arg = feeder(data) + output = arg.getSlotValue(0) + assert isinstance(output, api.Matrix) + for i in xrange(batch_size): + self.assertEqual(output.getSparseRowCols(i), v[i]) + + def test_integer(self): + dim = 100 + batch_size = 32 + index = [] + for i in xrange(batch_size): + index.append([np.random.randint(dim)]) + feeder = DataFeeder([('input', data_type.integer_value(dim))], + {'input': 0}) + arg = feeder(index) + output = arg.getSlotIds(0).copyToNumpyArray() + index = np.array(index, dtype='int') + self.assertEqual(output.all(), index.flatten().all()) + + def test_multiple_slots(self): + batch_size = 2 + data = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append(np.random.randint(10)) # size of feature 2: 10 + each_sample.append( + self.sparse_binary_reader( + 20000, 40, non_empty=True)) # size of feature 1: 20000 + each_sample.append(self.dense_reader(100)) # size of feature 0: 100 + data.append(each_sample) + + # test multiple features + data_types = [('fea0', data_type.dense_vector(100)), + ('fea1', data_type.sparse_binary_vector(20000)), + ('fea2', data_type.integer_value(10))] + feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0}) + arg = feeder(data) + output_dense = arg.getSlotValue(0).copyToNumpyMat() + output_sparse = arg.getSlotValue(1) + output_index = arg.getSlotIds(2).copyToNumpyArray() + for i in xrange(batch_size): + self.assertEqual(output_dense[i].all(), data[i][2].all()) + self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1]) + self.assertEqual(output_index[i], data[i][0]) + + # reader returns 3 featreus, but only use 2 features + data_types = [('fea0', data_type.dense_vector(100)), + ('fea2', data_type.integer_value(10))] + feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0}) + arg = feeder(data) + output_dense = arg.getSlotValue(0).copyToNumpyMat() + output_index = arg.getSlotIds(1).copyToNumpyArray() + for i in xrange(batch_size): + self.assertEqual(output_dense[i].all(), data[i][2].all()) + self.assertEqual(output_index[i], data[i][0]) + + +if __name__ == '__main__': + api.initPaddle("--use_gpu=0") + unittest.main() + +if __name__ == '__main__': + api.initPaddle("--use_gpu=0") + unittest.main() From f3c7fbeec4e256585dcf36e08fc2c06da243a045 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 23 Feb 2017 22:05:59 +0800 Subject: [PATCH 19/35] make minst to run --- demo/mnist/api_train_v2.py | 9 +++++---- python/paddle/v2/__init__.py | 1 + python/paddle/v2/tests/test_data_feeder.py | 22 ++++++++++++++++------ python/paddle/v2/trainer.py | 11 +++-------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 6fc01ce58b..650bf392bb 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -50,11 +50,12 @@ def main(): parameters=parameters, event_handler=event_handler, batch_size=32, # batch size should be refactor in Data reader - data_types={ # data_types will be removed, It should be in + data_types=[ # data_types will be removed, It should be in # network topology - 'pixel': images.type, - 'label': label.type - }) + ('pixel', images.type), + ('label', label.type)], + reader_dict={'pixel':0, 'label':1} + ) if __name__ == '__main__': diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index be752731ba..bf06b5a7e3 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -18,6 +18,7 @@ import parameters import trainer import event import data_type +import data_feeder import py_paddle.swig_paddle as api __all__ = [ diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py index dcf433d7d8..95a59a5d97 100644 --- a/python/paddle/v2/tests/test_data_feeder.py +++ b/python/paddle/v2/tests/test_data_feeder.py @@ -36,7 +36,7 @@ class DataFeederTest(unittest.TestCase): def compare(input): feeder = DataFeeder([('image', data_type.dense_vector(784))], {'image': 0}) - arg = feeder([input]) + arg = feeder(input) output = arg.getSlotValue(0).copyToNumpyMat() input = np.array(input, dtype='float32') self.assertAlmostEqual(input.all(), output.all()) @@ -46,13 +46,17 @@ class DataFeederTest(unittest.TestCase): dim = 784 data = [] for i in xrange(batch_size): - data.append(self.dense_reader(784)) + each_sample = [] + each_sample.append(self.dense_reader(dim)) + data.append(each_sample) compare(data) # test list data = [] for i in xrange(batch_size): - data.append(self.dense_reader(784).tolist()) + each_sample = [] + each_sample.append(self.dense_reader(dim).tolist()) + data.append(each_sample) compare(data) def test_sparse_binary(self): @@ -60,7 +64,9 @@ class DataFeederTest(unittest.TestCase): batch_size = 32 data = [] for i in xrange(batch_size): - data.append([self.sparse_binary_reader(dim, 50)]) + each_sample = [] + each_sample.append(self.sparse_binary_reader(dim, 50)) + data.append(each_sample) feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))], {'input': 0}) arg = feeder(data) @@ -76,11 +82,13 @@ class DataFeederTest(unittest.TestCase): w = [] data = [] for dat in xrange(batch_size): + each_sample = [] a = self.sparse_binary_reader(dim, 40, non_empty=True) b = self.dense_reader(len(a)).tolist() v.append(a) w.append(b[0]) - data.append([zip(a, b)]) + each_sample.append(zip(a, b)) + data.append(each_sample) feeder = DataFeeder([('input', data_type.sparse_vector(dim))], {'input': 0}) @@ -95,7 +103,9 @@ class DataFeederTest(unittest.TestCase): batch_size = 32 index = [] for i in xrange(batch_size): - index.append([np.random.randint(dim)]) + each_sample = [] + each_sample.append(np.random.randint(dim)) + index.append(each_sample) feeder = DataFeeder([('input', data_type.integer_value(dim))], {'input': 0}) arg = feeder(index) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 5709c7e886..023ab5e42d 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -69,7 +69,8 @@ class SGD(ITrainer): test_data_reader=None, event_handler=None, batch_size=32, - data_types=None): + data_types=None, + reader_dict=None): """ Training method. Will train num_passes of input data. @@ -103,13 +104,7 @@ class SGD(ITrainer): gm.start() out_args = api.Arguments.createArguments(0) - data_types_lists = [] - for each in topology.input_layer_names: - if each not in data_types: - raise ValueError() - data_types_lists.append(data_types[each]) - - feeder = DataFeeder(input_types=data_types_lists) + feeder = DataFeeder(data_types, reader_dict) for pass_id in xrange(num_passes): updater.startPass() From 38a792f20ed9e65d2920ded6ad42a5b68f2146ee Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 24 Feb 2017 13:52:31 +0800 Subject: [PATCH 20/35] Clean mnist code --- python/paddle/v2/data_set/config.py | 8 ++++ python/paddle/v2/data_set/mnist.py | 58 +++++++++++++---------------- 2 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 python/paddle/v2/data_set/config.py diff --git a/python/paddle/v2/data_set/config.py b/python/paddle/v2/data_set/config.py new file mode 100644 index 0000000000..69e96d65ef --- /dev/null +++ b/python/paddle/v2/data_set/config.py @@ -0,0 +1,8 @@ +import os + +__all__ = ['DATA_HOME'] + +DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') + +if not os.path.exists(DATA_HOME): + os.makedirs(DATA_HOME) diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py index 34f61bb9f6..6f35acf683 100644 --- a/python/paddle/v2/data_set/mnist.py +++ b/python/paddle/v2/data_set/mnist.py @@ -1,61 +1,53 @@ import sklearn.datasets.mldata import sklearn.model_selection import numpy +from config import DATA_HOME -__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] +__all__ = ['MNIST', 'train_creator', 'test_creator'] -DATA_HOME = None +def __mnist_reader_creator__(data, target): + def reader(): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield (data[i] / 255.0).astype(numpy.float32), int(target[i]) -def __mnist_reader__(data, target): - n_samples = data.shape[0] - for i in xrange(n_samples): - yield data[i].astype(numpy.float32), int(target[i]) + return reader -class MNISTReader(object): +class MNIST(object): """ mnist dataset reader. The `train_reader` and `test_reader` method returns a iterator of each sample. Each sample is combined by 784-dim float and a one-dim label """ - def __init__(self, random_state): + def __init__(self, random_state=0, test_size=10000, **options): data = sklearn.datasets.mldata.fetch_mldata( "MNIST original", data_home=DATA_HOME) - n_train = 60000 self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( - data.data / 255.0, - data.target.astype("int"), - train_size=n_train, - random_state=random_state) + data.data, + data.target, + test_size=test_size, + random_state=random_state, + **options) - def train_reader(self): - return __mnist_reader__(self.X_train, self.y_train) + def train_creator(self): + return __mnist_reader_creator__(self.X_train, self.y_train) - def test_reader(self): - return __mnist_reader__(self.X_test, self.y_test) + def test_creator(self): + return __mnist_reader_creator__(self.X_test, self.y_test) -__default_instance__ = MNISTReader(0) - - -def train_reader_creator(): - """ - Default train set reader creator. - """ - return __default_instance__.train_reader - - -def test_reader_creator(): - """ - Default test set reader creator. - """ - return __default_instance__.test_reader +__default_instance__ = MNIST() +train_creator = __default_instance__.train_creator +test_creator = __default_instance__.test_creator def unittest(): - assert len(list(train_reader_creator()())) == 60000 + size = 12045 + mnist = MNIST(test_size=size) + assert len(list(mnist.test_creator()())) == size if __name__ == '__main__': From ef9041c07bdf5d5f86b0b5b12045b4cec3719953 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 23 Feb 2017 19:20:33 +0800 Subject: [PATCH 21/35] MNIST dataset reader implementation --- python/paddle/v2/data_set/__init__.py | 0 python/paddle/v2/data_set/mnist.py | 62 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 python/paddle/v2/data_set/__init__.py create mode 100644 python/paddle/v2/data_set/mnist.py diff --git a/python/paddle/v2/data_set/__init__.py b/python/paddle/v2/data_set/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py new file mode 100644 index 0000000000..34f61bb9f6 --- /dev/null +++ b/python/paddle/v2/data_set/mnist.py @@ -0,0 +1,62 @@ +import sklearn.datasets.mldata +import sklearn.model_selection +import numpy + +__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] + +DATA_HOME = None + + +def __mnist_reader__(data, target): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield data[i].astype(numpy.float32), int(target[i]) + + +class MNISTReader(object): + """ + mnist dataset reader. The `train_reader` and `test_reader` method returns + a iterator of each sample. Each sample is combined by 784-dim float and a + one-dim label + """ + + def __init__(self, random_state): + data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) + n_train = 60000 + self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( + data.data / 255.0, + data.target.astype("int"), + train_size=n_train, + random_state=random_state) + + def train_reader(self): + return __mnist_reader__(self.X_train, self.y_train) + + def test_reader(self): + return __mnist_reader__(self.X_test, self.y_test) + + +__default_instance__ = MNISTReader(0) + + +def train_reader_creator(): + """ + Default train set reader creator. + """ + return __default_instance__.train_reader + + +def test_reader_creator(): + """ + Default test set reader creator. + """ + return __default_instance__.test_reader + + +def unittest(): + assert len(list(train_reader_creator()())) == 60000 + + +if __name__ == '__main__': + unittest() From befc3e066b633ae2a9e0c448037a93ede6de4ddf Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 24 Feb 2017 13:52:31 +0800 Subject: [PATCH 22/35] Clean mnist code --- python/paddle/v2/data_set/config.py | 8 ++++ python/paddle/v2/data_set/mnist.py | 58 +++++++++++++---------------- 2 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 python/paddle/v2/data_set/config.py diff --git a/python/paddle/v2/data_set/config.py b/python/paddle/v2/data_set/config.py new file mode 100644 index 0000000000..69e96d65ef --- /dev/null +++ b/python/paddle/v2/data_set/config.py @@ -0,0 +1,8 @@ +import os + +__all__ = ['DATA_HOME'] + +DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') + +if not os.path.exists(DATA_HOME): + os.makedirs(DATA_HOME) diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py index 34f61bb9f6..6f35acf683 100644 --- a/python/paddle/v2/data_set/mnist.py +++ b/python/paddle/v2/data_set/mnist.py @@ -1,61 +1,53 @@ import sklearn.datasets.mldata import sklearn.model_selection import numpy +from config import DATA_HOME -__all__ = ['MNISTReader', 'train_reader_creator', 'test_reader_creator'] +__all__ = ['MNIST', 'train_creator', 'test_creator'] -DATA_HOME = None +def __mnist_reader_creator__(data, target): + def reader(): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield (data[i] / 255.0).astype(numpy.float32), int(target[i]) -def __mnist_reader__(data, target): - n_samples = data.shape[0] - for i in xrange(n_samples): - yield data[i].astype(numpy.float32), int(target[i]) + return reader -class MNISTReader(object): +class MNIST(object): """ mnist dataset reader. The `train_reader` and `test_reader` method returns a iterator of each sample. Each sample is combined by 784-dim float and a one-dim label """ - def __init__(self, random_state): + def __init__(self, random_state=0, test_size=10000, **options): data = sklearn.datasets.mldata.fetch_mldata( "MNIST original", data_home=DATA_HOME) - n_train = 60000 self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( - data.data / 255.0, - data.target.astype("int"), - train_size=n_train, - random_state=random_state) + data.data, + data.target, + test_size=test_size, + random_state=random_state, + **options) - def train_reader(self): - return __mnist_reader__(self.X_train, self.y_train) + def train_creator(self): + return __mnist_reader_creator__(self.X_train, self.y_train) - def test_reader(self): - return __mnist_reader__(self.X_test, self.y_test) + def test_creator(self): + return __mnist_reader_creator__(self.X_test, self.y_test) -__default_instance__ = MNISTReader(0) - - -def train_reader_creator(): - """ - Default train set reader creator. - """ - return __default_instance__.train_reader - - -def test_reader_creator(): - """ - Default test set reader creator. - """ - return __default_instance__.test_reader +__default_instance__ = MNIST() +train_creator = __default_instance__.train_creator +test_creator = __default_instance__.test_creator def unittest(): - assert len(list(train_reader_creator()())) == 60000 + size = 12045 + mnist = MNIST(test_size=size) + assert len(list(mnist.test_creator()())) == size if __name__ == '__main__': From 5258bcf3eee7f1ca24af04759b0cf4f21e8b5f0a Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 24 Feb 2017 17:41:58 +0800 Subject: [PATCH 23/35] implement more layers in v2 --- doc/api/trainer_config_helpers/layers.rst | 36 ++- .../paddle/trainer_config_helpers/layers.py | 214 ++++++++-------- python/paddle/v2/__init__.py | 3 +- python/paddle/v2/layer.py | 235 ++++++++++++++---- python/paddle/v2/pooling.py | 24 ++ 5 files changed, 353 insertions(+), 159 deletions(-) create mode 100644 python/paddle/v2/pooling.py diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/trainer_config_helpers/layers.rst index 2793d6afd9..bbea823de4 100644 --- a/doc/api/trainer_config_helpers/layers.rst +++ b/doc/api/trainer_config_helpers/layers.rst @@ -139,24 +139,12 @@ lstmemory :members: lstmemory :noindex: -lstm_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstm_step_layer - :noindex: - grumemory --------- .. automodule:: paddle.trainer_config_helpers.layers :members: grumemory :noindex: -gru_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: gru_step_layer - :noindex: - Recurrent Layer Group ===================== @@ -172,6 +160,18 @@ recurrent_group :members: recurrent_group :noindex: +lstm_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: lstm_step_layer + :noindex: + +gru_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: gru_step_layer + :noindex: + beam_search ------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -308,6 +308,12 @@ repeat_layer :members: repeat_layer :noindex: +rotate_layer +------------ +.. automodule:: paddle.trainer_config_helpers.layers + :members: rotate_layer + :noindex: + seq_reshape_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers @@ -462,6 +468,12 @@ ctc_layer :members: ctc_layer :noindex: +warp_ctc_layer +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: warp_ctc_layer + :noindex: + nce_layer ----------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 00aef80691..de903f8c74 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -30,88 +30,28 @@ except ImportError: import copy __all__ = [ - "full_matrix_projection", - "AggregateLevel", - "ExpandLevel", - "identity_projection", - "dotmul_projection", - "dotmul_operator", - "repeat_layer", - "seq_reshape_layer", - "table_projection", - "mixed_layer", - "data_layer", - "embedding_layer", - "fc_layer", - "grumemory", - "pooling_layer", - "lstmemory", - "last_seq", - "first_seq", - "cos_sim", - "hsigmoid", - "conv_projection", - "regression_cost", - 'classification_cost', - "LayerOutput", - 'img_conv_layer', - 'img_pool_layer', - 'batch_norm_layer', - 'img_cmrnorm_layer', - 'addto_layer', - 'concat_layer', - 'seq_concat_layer', - 'lstm_step_layer', - 'recurrent_group', - 'memory', - 'StaticInput', - 'expand_layer', - 'scaling_layer', - 'scaling_projection', - 'power_layer', - 'interpolation_layer', - 'bilinear_interp_layer', - 'trans_layer', - 'rotate_layer', - 'sum_to_one_norm_layer', - 'get_output_layer', - 'LayerType', - 'context_projection', - 'beam_search', - 'maxid_layer', - 'GeneratedInput', - 'SubsequenceInput', - 'gru_step_layer', - 'recurrent_layer', - 'BaseGeneratedInput', - 'conv_operator', - 'conv_shift_layer', - 'tensor_layer', - 'selective_fc_layer', - 'sampling_id_layer', - 'slope_intercept_layer', - 'trans_full_matrix_projection', - 'linear_comb_layer', - 'convex_comb_layer', - 'ctc_layer', - 'warp_ctc_layer', - 'crf_layer', - 'crf_decoding_layer', - 'nce_layer', - 'cross_entropy_with_selfnorm', - 'cross_entropy', - 'multi_binary_label_cross_entropy', - 'sum_cost', - 'rank_cost', - 'lambda_cost', - 'huber_cost', - 'block_expand_layer', - 'maxout_layer', - 'out_prod_layer', - 'print_layer', - 'priorbox_layer', - 'spp_layer', - 'pad_layer', + "full_matrix_projection", "AggregateLevel", "ExpandLevel", + "identity_projection", "dotmul_projection", "dotmul_operator", + "repeat_layer", "seq_reshape_layer", "table_projection", "mixed_layer", + "data_layer", "embedding_layer", "fc_layer", "grumemory", "pooling_layer", + "lstmemory", "last_seq", "first_seq", "cos_sim", "hsigmoid", + "conv_projection", "regression_cost", 'classification_cost', "LayerOutput", + 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', 'img_cmrnorm_layer', + 'addto_layer', 'concat_layer', 'seq_concat_layer', 'lstm_step_layer', + 'recurrent_group', 'memory', 'StaticInput', 'expand_layer', 'scaling_layer', + 'scaling_projection', 'power_layer', 'interpolation_layer', + 'bilinear_interp_layer', 'trans_layer', 'rotate_layer', + 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', + 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', + 'SubsequenceInput', 'gru_step_layer', 'recurrent_layer', + 'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', 'tensor_layer', + 'selective_fc_layer', 'sampling_id_layer', 'slope_intercept_layer', + 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', + 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', + 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', + 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', + 'print_layer', 'priorbox_layer', 'spp_layer', 'pad_layer', 'eos_layer' ] @@ -1287,6 +1227,12 @@ def last_seq(input, """ Get Last Timestamp Activation of a sequence. + The simple usage is: + + .. code-block:: python + + seq = last_seq(input=layer) + :param agg_level: Aggregated level :param name: Layer name. :type name: basestring @@ -1325,6 +1271,12 @@ def first_seq(input, """ Get First Timestamp Activation of a sequence. + The simple usage is: + + .. code-block:: python + + seq = first_seq(input=layer) + :param agg_level: aggregation level :param name: Layer name. :type name: basestring @@ -1425,7 +1377,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): .. code-block:: python - expand = repeat_layer(layer, 4) + expand = repeat_layer(input=layer, num_repeats=4) :param input: Input layer :type input: LayerOutput @@ -1797,6 +1749,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): Note that the above computation is for one sample. Multiple samples are processed in one batch. + The example usage is: + + .. code-block:: python + + cos = cos_sim(a=layer1, b=layer2, size=3) + :param name: layer name :type name: basestring :param a: input layer a @@ -1958,6 +1916,16 @@ def img_conv_layer(input, pieces. First 256/4 = 64 channels will process by first 32 filters. The rest channels will be processed by rest group of filters. + The example usage is: + + .. code-block:: python + + conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, + num_channels=8, + num_filters=16, stride=1, + bias_attr=False, + act=ReluActivation()) + :param name: Layer name. :type name: basestring :param input: Layer Input. @@ -2097,6 +2065,34 @@ def img_pool_layer(input, .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ + - ceil_mode=True: + + .. math:: + + w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + + - ceil_mode=False: + + .. math:: + + w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + + The example usage is: + + .. code-block:: python + + maxpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + pool_type=MaxPooling()) + :param padding: pooling padding width. :type padding: int :param padding_y: pooling padding height. It's equal to padding by default. @@ -2123,19 +2119,6 @@ def img_pool_layer(input, :param ceil_mode: Wether to use ceil mode to calculate output height and with. Defalut is True. If set false, Otherwise use floor. - - ceil_mode=True: - - .. math:: - - w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride)) - h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y)) - - - ceil_mode=False: - - .. math:: - - w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride)) - h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y)) :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -2197,6 +2180,15 @@ def spp_layer(input, The details please refer to `Kaiming He's paper `_. + The example usage is: + + .. code-block:: python + + spp = spp_layer(input=data, + pyramid_height=2, + num_channels=16, + pool_type=MaxPooling()) + :param name: layer name. :type name: basestring :param input: layer's input. @@ -2285,6 +2277,12 @@ def img_cmrnorm_layer(input, The details please refer to `Alex's paper `_. + The example usage is: + + .. code-block:: python + + norm = img_cmrnorm_layer(input=net, size=5) + :param name: layer name. :type name: None|basestring :param input: layer's input. @@ -2340,6 +2338,12 @@ def batch_norm_layer(input, The details of batch normalization please refer to this `paper `_. + The example usage is: + + .. code-block:: python + + norm = batch_norm_layer(input=net, act=ReluActivation()) + :param name: layer name. :type name: basestring :param input: batch normalization input. Better be linear activation. @@ -3903,13 +3907,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): .. code-block:: python - conv_shift = conv_shift_layer(input=[layer1, layer2]) + conv_shift = conv_shift_layer(a=layer1, b=layer2) :param name: layer name :type name: basestring :param a: Input layer a. :type a: LayerOutput - :param b: input layer b + :param b: input layer b. :type b: LayerOutput :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute @@ -4001,8 +4005,8 @@ def tensor_layer(a, @wrap_act_default() @layer_support() def selective_fc_layer(input, - select, size, + select=None, act=None, name=None, pass_generation=False, @@ -4029,6 +4033,7 @@ def selective_fc_layer(input, :type input: LayerOutput|list|tuple :param select: The select layer. The output of select layer should be a sparse binary matrix, and treat as the mask of selective fc. + If is None, acts exactly like fc_layer. :type select: LayerOutput :param size: The layer dimension. :type size: int @@ -4257,7 +4262,7 @@ def block_expand_layer(input, .. code-block:: python - block_expand = block_expand_layer(input, + block_expand = block_expand_layer(input=layer, num_channels=128, stride_x=1, stride_y=1, @@ -4461,7 +4466,7 @@ def warp_ctc_layer(input, - You can set 'blank' to any value ranged in [0, num_classes], which should be consistent as that used in your labels. - As a native 'softmax' activation is interated to the warp-ctc library, - 'linear' activation is expected instead in the 'input' layer. + 'linear' activation is expected instead in the 'input' layer. The simple usage: @@ -4594,6 +4599,13 @@ def crf_decoding_layer(input, this layer will also calculate error. output.value[i] is 1 for incorrect decoding or 0 for correct decoding. + The simple usage: + + .. code-block:: python + + crf_decoding = crf_decoding_layer(input=input, + size=label_dim) + :param input: The first input layer. :type input: LayerOutput :param size: size of this layer. diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 0cf7b8e903..ab352e880e 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -19,11 +19,12 @@ import trainer import event import data_type import attr +import pooling import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', - 'event', 'data_type', 'attr' + 'event', 'data_type', 'attr', 'pooling' ] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 3920d4a08f..f4a85e9d03 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -76,12 +76,20 @@ from paddle.trainer_config_helpers.default_decorators import wrap_name_default import data_type import activation import attr +import pooling __all__ = [ - 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', - 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', + 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp', + 'maxout', 'img_cmrnorm', 'batch_norm', 'sum_to_one_norm', 'recurrent', + 'lstmemory', 'grumemory', 'pool', 'last_seq', 'first_seq', 'concat', + 'seq_concat', 'block_expand', 'expand', 'repeat', 'seq_reshape', 'addto', + 'linear_comb', 'interpolation', 'bilinear_interp', 'power', 'scaling', + 'slope_intercept', 'tensor', 'cos_sim', 'trans', 'max_id', 'sampling_id', + 'pad', 'classification_cost', 'cross_entropy_cost', + 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', - 'sum_cost', 'huber_cost' + 'sum_cost', 'huber_cost', 'crf', 'crf_decoding', 'ctc', 'warp_ctc', 'nce', + 'hsigmoid', 'eos' ] @@ -130,11 +138,8 @@ class Layer(object): raise NotImplementedError() -def __convert_to_v2__(method_name, name_prefix, parent_names): - if name_prefix is not None: - wrapper = wrap_name_default(name_prefix=name_prefix) - else: - wrapper = None +def __convert_to_v2__(method_name, parent_names): + wrapper = wrap_name_default(name_prefix=method_name) class V2LayerImpl(Layer): def __init__(self, name=None, **kwargs): @@ -192,44 +197,92 @@ class DataLayerV2(Layer): data = DataLayerV2 -fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) -max_id = __convert_to_v2__( - 'maxid_layer', name_prefix='maxid', parent_names=['input']) -classification_cost = __convert_to_v2__( - 'classification_cost', - name_prefix='classification_cost', - parent_names=['input', 'label', 'weight']) -regression_cost = __convert_to_v2__( - 'regression_cost', - name_prefix='regression_cost', - parent_names=['input', 'label', 'weight']) -cross_entropy_cost = __convert_to_v2__( - 'cross_entropy', - name_prefix='cross_entropy', - parent_names=['input', 'label']) -cross_entropy_with_selfnorm_cost = __convert_to_v2__( - 'cross_entropy_with_selfnorm', - name_prefix='cross_entropy_with_selfnorm', - parent_names=['input', 'label']) -multi_binary_label_cross_entropy_cost = __convert_to_v2__( - 'multi_binary_label_cross_entropy', - name_prefix='multi_binary_label_cross_entropy', - parent_names=['input', 'label']) -rank_cost = __convert_to_v2__( - 'rank_cost', - name_prefix='rank_cost', - parent_names=['left', 'right', 'label', 'weight']) -lambda_cost = __convert_to_v2__( - 'lambda_cost', name_prefix='lambda_cost', parent_names=['input', 'score']) -sum_cost = __convert_to_v2__( - 'sum_cost', name_prefix='sum_cost', parent_names=['input']) -huber_cost = __convert_to_v2__( - 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) +AggregateLevel = conf_helps.layers.AggregateLevel +ExpandLevel = conf_helps.layers.ExpandLevel + +layer_list = [ + # [V2LayerImpl, V1_method_name, parent_names] + # fully connected layers + ['fc', 'fc_layer', ['input']], + # conv layers + ['conv_shift', 'conv_shift_layer', ['a', 'b']], + ['img_conv', 'img_conv_layer', ['input']], + # image pooling layers + ['img_pool', 'img_pool_layer', ['input']], + ['spp', 'spp_layer', ['input']], + ['maxout', 'maxout_layer', ['input']], + # norm layers + ['img_cmrnorm', 'img_cmrnorm_layer', ['input']], + ['batch_norm', 'batch_norm_layer', ['input']], + ['sum_to_one_norm', 'sum_to_one_norm_layer', ['input']], + # recurrent layers + ['recurrent', 'recurrent_layer', ['input']], + ['lstmemory', 'lstmemory', ['input']], + ['grumemory', 'grumemory', ['input']], + # aggregate layers + ['pool', 'pooling_layer', ['input']], + ['last_seq', 'last_seq', ['input']], + ['first_seq', 'first_seq', ['input']], + ['concat', 'concat_layer', ['input']], + ['seq_concat', 'seq_concat_layer', ['a', 'b']], + # reshaping layers + ['block_expand', 'block_expand_layer', ['input']], + ['expand', 'expand_layer', ['input', 'expand_as']], + ['repeat', 'repeat_layer', ['input']], + ['rotate', 'rotate_layer', ['input']], + ['seq_reshape', 'seq_reshape_layer', ['input']], + # math layers + ['addto', 'addto_layer', ['input']], + ['linear_comb', 'linear_comb_layer', ['weights', 'vectors']], + ['interpolation', 'interpolation_layer', ['input', 'weight']], + ['bilinear_interp', 'bilinear_interp_layer', ['input']], + ['power', 'power_layer', ['input', 'weight']], + ['scaling', 'scaling_layer', ['input', 'weight']], + ['slope_intercept', 'slope_intercept_layer', ['input']], + ['tensor', 'tensor_layer', ['a', 'b']], + ['cos_sim', 'cos_sim', ['a', 'b']], + ['trans', 'trans_layer', ['input']], + # sampling layers + ['max_id', 'maxid_layer', ['input']], + ['sampling_id', 'sampling_id_layer', ['input']], + # slicing and joining layers + ['pad', 'pad_layer', ['input']], + # cost layers + [ + 'classification_cost', 'classification_cost', + ['input', 'label', 'weight'] + ], + ['regression_cost', 'regression_cost', ['input', 'label', 'weight']], + ['cross_entropy_cost', 'cross_entropy', ['input', 'label']], + [ + 'cross_entropy_with_selfnorm_cost', 'cross_entropy_with_selfnorm', + ['input', 'label'] + ], + [ + 'multi_binary_label_cross_entropy_cost', + 'multi_binary_label_cross_entropy', ['input', 'label'] + ], + ['rank_cost', 'rank_cost', ['left', 'right', 'label', 'weight']], + ['lambda_cost', 'lambda_cost', ['input', 'score']], + ['sum_cost', 'sum_cost', ['input']], + ['huber_cost', 'huber_cost', ['input', 'label']], + ['crf', 'crf_layer', ['input', 'label']], + ['crf_decoding', 'crf_decoding_layer', ['input']], + ['ctc', 'ctc_layer', ['input', 'label']], + ['warp_ctc', 'warp_ctc_layer', ['input', 'label']], + ['nce', 'nce_layer', ['input', 'label']], + ['hsigmoid', 'hsigmoid', ['input', 'label']], + # check layers + ['eos', 'eos_layer', ['input']] +] +for l in layer_list: + globals()[l[0]] = __convert_to_v2__(l[1], l[2]) if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) + pixel = data(name='pixel', type=data_type.dense_vector(128)) label = data(name='label', type=data_type.integer_value(10)) weight = data(name='weight', type=data_type.dense_vector(10)) + word = data(name='word', type=data_type.integer_value(12)) score = data(name='score', type=data_type.dense_vector(1)) hidden = fc(input=pixel, @@ -237,7 +290,90 @@ if __name__ == '__main__': act=activation.Sigmoid(), param_attr=attr.Param(name='hidden')) inference = fc(input=hidden, size=10, act=activation.Softmax()) + print parse_network(inference) + + # test conv layers + conv1 = conv_shift(a=pixel, b=score) + conv2 = img_conv( + input=pixel, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + act=activation.Linear()) + print parse_network(conv1, conv2) + + # test image pooling layers + maxpool = img_pool( + input=conv2, + pool_size=2, + num_channels=16, + padding=1, + pool_type=pooling.Max()) + spp = spp(input=conv2, + pyramid_height=2, + num_channels=16, + pool_type=pooling.Max()) + maxout = maxout(input=conv2, num_channels=16, groups=4) + print parse_network(maxpool, spp, maxout) + + # test norm layers + norm1 = img_cmrnorm(input=maxpool, size=5) + norm2 = batch_norm(input=maxpool) + norm3 = sum_to_one_norm(input=maxpool) + print parse_network(norm1, norm2, norm3) + + # test recurrent layers + recurrent = recurrent(input=word) + lstm = lstmemory(input=word) + gru = grumemory(input=word) + print parse_network(recurrent, lstm, gru) + + # test aggregate layers + pool = pool( + input=pixel, + pooling_type=pooling.Avg(), + agg_level=AggregateLevel.EACH_SEQUENCE) + last_seq = last_seq(input=pixel) + first_seq = first_seq(input=pixel) + concat = concat(input=[last_seq, first_seq]) + seq_concat = seq_concat(a=last_seq, b=first_seq) + print parse_network(pool, last_seq, first_seq, concat, seq_concat) + + # test reshaping layers + block_expand = block_expand( + input=maxout, num_channels=4, stride_x=1, block_x=1) + expand = expand( + input=last_seq, expand_as=pixel, expand_level=ExpandLevel.FROM_TIMESTEP) + repeat = repeat(input=last_seq, num_repeats=4) + reshape = seq_reshape(input=last_seq, reshape_size=4) + rotate = rotate(input=pixel, height=16, width=49) + print parse_network(block_expand, expand, repeat, reshape, rotate) + + # test math layers + addto = addto(input=[last_seq, first_seq]) + linear_comb = linear_comb(weights=weight, vectors=hidden, size=10) + interpolation = interpolation(input=[hidden, hidden], weight=score) + bilinear = bilinear_interp(input=conv2, out_size_x=4, out_size_y=4) + power = power(input=conv1, weight=score) + scaling = scaling(input=conv1, weight=score) + slope = slope_intercept(input=conv1) + tensor = tensor(a=last_seq, b=first_seq, size=1000) + cos_sim = cos_sim(a=last_seq, b=first_seq) + trans = trans(input=tensor) + print parse_network(addto, linear_comb, interpolation, bilinear, power, + scaling, slope, tensor, cos_sim, trans) + + # test sampling layers maxid = max_id(input=inference) + sampling_id = sampling_id(input=inference) + print parse_network(maxid, sampling_id) + + # test slicing and joining layers + pad = pad(input=maxpool, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) + print parse_network(pad) + + # test cost layers cost1 = classification_cost(input=inference, label=label) cost2 = classification_cost(input=inference, label=label, weight=weight) cost3 = cross_entropy_cost(input=inference, label=label) @@ -249,9 +385,18 @@ if __name__ == '__main__': cost9 = lambda_cost(input=inference, score=score) cost10 = sum_cost(input=inference) cost11 = huber_cost(input=score, label=label) - - print parse_network(cost1, cost2) print parse_network(cost3, cost4) print parse_network(cost5, cost6) print parse_network(cost7, cost8, cost9, cost10, cost11) - print parse_network(inference, maxid) + + crf = crf(input=inference, label=label) + crf_decoding = crf_decoding(input=inference, size=3) + ctc = ctc(input=inference, label=label) + warp_ctc = warp_ctc(input=pixel, label=label) + nce = nce(input=inference, label=label, num_classes=3) + hsigmoid = hsigmoid(input=inference, label=label, num_classes=3) + print parse_network(crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid) + + # test check layers + eos = eos(input=maxid, eos_id=5) + print parse_network(eos) diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py new file mode 100644 index 0000000000..9076a159bb --- /dev/null +++ b/python/paddle/v2/pooling.py @@ -0,0 +1,24 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers.poolings import * + +__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"] + +Max = MaxPooling +CudnnMax = CudnnMaxPooling +Avg = AvgPooling +CudnnAvg = CudnnAvgPooling +Sum = SumPooling +SquareRootN = SquareRootNPooling From 623d24ad5c201996c66fc2ec48c43fd6a7f75973 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 24 Feb 2017 19:19:32 +0800 Subject: [PATCH 24/35] convert mixed layer, projection and operator --- .../paddle/trainer_config_helpers/layers.py | 2 + python/paddle/v2/data_type.py | 4 +- python/paddle/v2/layer.py | 248 +++++++++++++++++- 3 files changed, 239 insertions(+), 15 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 00aef80691..95f0915972 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -112,6 +112,7 @@ __all__ = [ 'priorbox_layer', 'spp_layer', 'pad_layer', + 'layer_support', ] @@ -708,6 +709,7 @@ class MixedLayerType(LayerOutput): # update the size which might be computed inside MixedLayer # according to the operator's output size self.size = ml.config.size + self.finalized = True @wrap_name_default("mixed") diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py index 5b01ba4cd4..dd3ebfcb42 100644 --- a/python/paddle/v2/data_type.py +++ b/python/paddle/v2/data_type.py @@ -14,9 +14,9 @@ from paddle.trainer.PyDataProvider2 import \ InputType, dense_vector, sparse_binary_vector,\ - sparse_vector, integer_value + sparse_vector, integer_value, integer_value_sequence __all__ = [ 'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector', - 'integer_value' + 'integer_value', 'integer_value_sequence' ] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 3920d4a08f..0e9e2a249d 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -72,16 +72,38 @@ import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default +from paddle.trainer_config_helpers.default_decorators import wrap_act_default +from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default +from paddle.trainer_config_helpers.layers import layer_support import data_type import activation import attr +#import pudb;pudb.set_trace() + __all__ = [ - 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', - 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', - 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', - 'sum_cost', 'huber_cost' + 'parse_network', + 'data', + 'fc', + 'max_id', + 'classification_cost', + 'cross_entropy_cost', + 'cross_entropy_with_selfnorm_cost', + 'regression_cost', + 'multi_binary_label_cross_entropy_cost', + 'rank_cost', + 'lambda_cost', + 'sum_cost', + 'huber_cost' + 'full_matrix_projection', + 'trans_full_matrix_projection', + 'table_projection', + 'identity_projection', + 'scaling_projection', + 'dotmul_projection', + 'context_projection', + 'conv_projection', ] @@ -101,9 +123,8 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name, parent_layers): + def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) - assert isinstance(name, basestring) self.name = name self.__parent_layers__ = parent_layers @@ -122,6 +143,9 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer + if self.name is None: + return self.to_proto_impl(**kwargs) + if self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) return context[self.name] @@ -130,7 +154,7 @@ class Layer(object): raise NotImplementedError() -def __convert_to_v2__(method_name, name_prefix, parent_names): +def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): if name_prefix is not None: wrapper = wrap_name_default(name_prefix=name_prefix) else: @@ -160,7 +184,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] - return getattr(conf_helps, method_name)(name=self.name, **args) + return getattr(conf_helps, method_name)(**args) return V2LayerImpl @@ -191,6 +215,81 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class MixedLayerV2(Layer): + """ + This class is use to support `with` grammar. If not, the following code + could convert mixed_layer simply. + + mixed = __convert_to_v2__( + 'mixed_layer', name_prefix='mixed', parent_names=['input']) + """ + + class AddToSealedMixedLayerExceptionV2(Exception): + def __init__(self): + Exception.__init__(self) + + def __init__(self, + size=0, + input=None, + name=None, + act=None, + bias_attr=None, + layer_attr=None): + self.__method_name__ = 'mixed_layer' + self.finalized = False + + self.__parent_layers__ = dict() + other_kwargs = dict() + self.input_name = 'input' + self.__parent_layers__[self.input_name] = [] + if input is not None: + self.__parent_layers__[self.input_name] = input + + self.name = name + other_kwargs['size'] = size + other_kwargs['act'] = act + other_kwargs['bias_attr'] = bias_attr + other_kwargs['layer_attr'] = layer_attr + + Layer.__init__(self, name, self.__parent_layers__) + self.__other_kwargs__ = other_kwargs + + def __iadd__(self, other): + if not self.finalized: + self.__parent_layers__[self.input_name].append(other) + return self + else: + raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + + def __enter__(self): + assert len(self.__parent_layers__[self.input_name]) == 0 + return self + + def __exit__(self, *args, **kwargs): + self.finalized = True + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, self.__method_name__)(name=self.name, **args) + + +@wrap_name_default("mixed") +@wrap_act_default(act=conf_helps.LinearActivation()) +@wrap_bias_attr_default(has_bias=False) +@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT) +def mixed(size=0, + name=None, + input=None, + act=None, + bias_attr=False, + layer_attr=None): + return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) + + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( @@ -226,12 +325,124 @@ sum_cost = __convert_to_v2__( huber_cost = __convert_to_v2__( 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - score = data(name='score', type=data_type.dense_vector(1)) +# convert projection +projection_list = [ + # [V1_method_name], all the parent_names is `input` + 'full_matrix_projection', + 'trans_full_matrix_projection', + 'table_projection', + 'scaling_projection', + 'dotmul_projection', + 'context_projection', + 'conv_projection', + 'identity_projection', +] +for prj in projection_list: + globals()[prj] = __convert_to_v2__(prj, parent_names=['input']) + +# convert operator +operator_list = [ + # [V1_method_name, parent_names], + ['dotmul_operator', ['a', 'b']], + ['conv_operator', ['img', 'filter']] +] +for op in operator_list: + globals()[op[0]] = __convert_to_v2__(op[0], parent_names=op[1]) + +def test_projection(): + """ + TODO: move to tests file + """ + input = data(name='data', type=data_type.dense_vector(784)) + word = data(name='word', type=data_type.integer_value_sequence(10000)) + fc0 = fc(input=input, size=100, act=conf_helps.SigmoidActivation()) + fc1 = fc(input=input, size=200, act=conf_helps.SigmoidActivation()) + mixed0 = mixed( + size=256, + input=[ + full_matrix_projection(input=fc0), full_matrix_projection(input=fc1) + ]) + with mixed(size=200) as mixed1: + mixed1 += full_matrix_projection(input=fc0) + mixed1 += identity_projection(input=fc1) + + table = table_projection(input=word) + emb0 = mixed(size=512, input=table) + with mixed(size=512) as emb1: + emb1 += table + + scale = scaling_projection(input=fc0) + scale0 = mixed(size=100, input=scale) + with mixed(size=100) as scale1: + scale1 += scale + + dotmul = dotmul_projection(input=fc0) + dotmul0 = mixed(size=100, input=dotmul) + with mixed(size=100) as dotmul1: + dotmul1 += dotmul + + context = context_projection(input=fc0, context_len=5) + context0 = mixed(size=100, input=context) + with mixed(size=100) as context1: + context1 += context + + conv = conv_projection( + input=input, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = mixed(input=conv, bias_attr=True) + with mixed(bias_attr=True) as conv1: + conv1 += conv + + print parse_network(mixed0) + print parse_network(mixed1) + print parse_network(emb0) + print parse_network(emb1) + print parse_network(scale0) + print parse_network(scale1) + print parse_network(dotmul0) + print parse_network(dotmul1) + print parse_network(conv0) + print parse_network(conv1) + + +def test_operator(): + """ + TODO: move to tests file + """ + ipt0 = data(name='data', type=data_type.dense_vector(784)) + ipt1 = data(name='word', type=data_type.dense_vector(128)) + fc0 = fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) + fc1 = fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) + + dotmul_op = dotmul_operator(a=fc0, b=fc1) + dotmul0 = mixed(input=dotmul_op) + with mixed() as dotmul1: + dotmul1 += dotmul_op + + conv = conv_operator( + img=ipt0, + filter=ipt1, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = mixed(input=conv) + with mixed() as conv1: + conv1 += conv + + print parse_network(dotmul0) + print parse_network(dotmul1) + print parse_network(conv0) + print parse_network(conv1) + + +def test_cost(pixel, label, weight, score): hidden = fc(input=pixel, size=100, act=activation.Sigmoid(), @@ -255,3 +466,14 @@ if __name__ == '__main__': print parse_network(cost5, cost6) print parse_network(cost7, cost8, cost9, cost10, cost11) print parse_network(inference, maxid) + + +if __name__ == '__main__': + pixel = data(name='pixel', type=data_type.dense_vector(784)) + label = data(name='label', type=data_type.integer_value(10)) + weight = data(name='weight', type=data_type.dense_vector(10)) + score = data(name='score', type=data_type.dense_vector(1)) + + test_cost(pixel, label, weight, score) + test_projection() + test_operator() From 9b73a602c4aa553888b450373eb31a5a9703c227 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 24 Feb 2017 19:23:24 +0800 Subject: [PATCH 25/35] remove comments --- python/paddle/v2/layer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 0e9e2a249d..618a220240 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -71,6 +71,7 @@ import collections import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ + from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default @@ -80,8 +81,6 @@ import data_type import activation import attr -#import pudb;pudb.set_trace() - __all__ = [ 'parse_network', 'data', From 4c85f955d78674a3803fab56c5dfd095bc6fde2b Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Fri, 24 Feb 2017 20:47:01 +0800 Subject: [PATCH 26/35] move test module --- python/paddle/v2/layer.py | 129 --------------------------- python/paddle/v2/tests/test_layer.py | 92 ++++++++++++++++++- 2 files changed, 90 insertions(+), 131 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 618a220240..a3fac6ca67 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -347,132 +347,3 @@ operator_list = [ ] for op in operator_list: globals()[op[0]] = __convert_to_v2__(op[0], parent_names=op[1]) - - -def test_projection(): - """ - TODO: move to tests file - """ - input = data(name='data', type=data_type.dense_vector(784)) - word = data(name='word', type=data_type.integer_value_sequence(10000)) - fc0 = fc(input=input, size=100, act=conf_helps.SigmoidActivation()) - fc1 = fc(input=input, size=200, act=conf_helps.SigmoidActivation()) - mixed0 = mixed( - size=256, - input=[ - full_matrix_projection(input=fc0), full_matrix_projection(input=fc1) - ]) - with mixed(size=200) as mixed1: - mixed1 += full_matrix_projection(input=fc0) - mixed1 += identity_projection(input=fc1) - - table = table_projection(input=word) - emb0 = mixed(size=512, input=table) - with mixed(size=512) as emb1: - emb1 += table - - scale = scaling_projection(input=fc0) - scale0 = mixed(size=100, input=scale) - with mixed(size=100) as scale1: - scale1 += scale - - dotmul = dotmul_projection(input=fc0) - dotmul0 = mixed(size=100, input=dotmul) - with mixed(size=100) as dotmul1: - dotmul1 += dotmul - - context = context_projection(input=fc0, context_len=5) - context0 = mixed(size=100, input=context) - with mixed(size=100) as context1: - context1 += context - - conv = conv_projection( - input=input, - filter_size=1, - num_channels=1, - num_filters=128, - stride=1, - padding=0) - conv0 = mixed(input=conv, bias_attr=True) - with mixed(bias_attr=True) as conv1: - conv1 += conv - - print parse_network(mixed0) - print parse_network(mixed1) - print parse_network(emb0) - print parse_network(emb1) - print parse_network(scale0) - print parse_network(scale1) - print parse_network(dotmul0) - print parse_network(dotmul1) - print parse_network(conv0) - print parse_network(conv1) - - -def test_operator(): - """ - TODO: move to tests file - """ - ipt0 = data(name='data', type=data_type.dense_vector(784)) - ipt1 = data(name='word', type=data_type.dense_vector(128)) - fc0 = fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) - fc1 = fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) - - dotmul_op = dotmul_operator(a=fc0, b=fc1) - dotmul0 = mixed(input=dotmul_op) - with mixed() as dotmul1: - dotmul1 += dotmul_op - - conv = conv_operator( - img=ipt0, - filter=ipt1, - filter_size=1, - num_channels=1, - num_filters=128, - stride=1, - padding=0) - conv0 = mixed(input=conv) - with mixed() as conv1: - conv1 += conv - - print parse_network(dotmul0) - print parse_network(dotmul1) - print parse_network(conv0) - print parse_network(conv1) - - -def test_cost(pixel, label, weight, score): - hidden = fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) - inference = fc(input=hidden, size=10, act=activation.Softmax()) - maxid = max_id(input=inference) - cost1 = classification_cost(input=inference, label=label) - cost2 = classification_cost(input=inference, label=label, weight=weight) - cost3 = cross_entropy_cost(input=inference, label=label) - cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) - cost5 = regression_cost(input=inference, label=label) - cost6 = regression_cost(input=inference, label=label, weight=weight) - cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) - cost8 = rank_cost(left=score, right=score, label=score) - cost9 = lambda_cost(input=inference, score=score) - cost10 = sum_cost(input=inference) - cost11 = huber_cost(input=score, label=label) - - print parse_network(cost1, cost2) - print parse_network(cost3, cost4) - print parse_network(cost5, cost6) - print parse_network(cost7, cost8, cost9, cost10, cost11) - print parse_network(inference, maxid) - - -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - score = data(name='score', type=data_type.dense_vector(1)) - - test_cost(pixel, label, weight, score) - test_projection() - test_operator() diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index b600e8cf76..521bc8b40c 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -19,8 +19,6 @@ import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -from paddle.trainer_config_helpers.config_parser_utils import \ - parse_network_config as parse_network pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) @@ -58,6 +56,96 @@ class CostLayerTest(unittest.TestCase): #print layer.parse_network(cost5, cost6) #print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + def test_projection(self): + input = layer.data(name='data', type=data_type.dense_vector(784)) + word = layer.data( + name='word', type=data_type.integer_value_sequence(10000)) + fc0 = layer.fc(input=input, + size=100, + act=conf_helps.SigmoidActivation()) + fc1 = layer.fc(input=input, + size=200, + act=conf_helps.SigmoidActivation()) + mixed0 = layer.mixed( + size=256, + input=[ + layer.full_matrix_projection(input=fc0), + layer.full_matrix_projection(input=fc1) + ]) + with layer.mixed(size=200) as mixed1: + mixed1 += layer.full_matrix_projection(input=fc0) + mixed1 += layer.identity_projection(input=fc1) + + table = layer.table_projection(input=word) + emb0 = layer.mixed(size=512, input=table) + with layer.mixed(size=512) as emb1: + emb1 += table + + scale = layer.scaling_projection(input=fc0) + scale0 = layer.mixed(size=100, input=scale) + with layer.mixed(size=100) as scale1: + scale1 += scale + + dotmul = layer.dotmul_projection(input=fc0) + dotmul0 = layer.mixed(size=100, input=dotmul) + with layer.mixed(size=100) as dotmul1: + dotmul1 += dotmul + + context = layer.context_projection(input=fc0, context_len=5) + context0 = layer.mixed(size=100, input=context) + with layer.mixed(size=100) as context1: + context1 += context + + conv = layer.conv_projection( + input=input, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv, bias_attr=True) + with layer.mixed(bias_attr=True) as conv1: + conv1 += conv + + print layer.parse_network(mixed0) + print layer.parse_network(mixed1) + print layer.parse_network(emb0) + print layer.parse_network(emb1) + print layer.parse_network(scale0) + print layer.parse_network(scale1) + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + + def test_operator(self): + ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) + ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) + fc0 = layer.fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) + fc1 = layer.fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) + + dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) + dotmul0 = layer.mixed(input=dotmul_op) + with layer.mixed() as dotmul1: + dotmul1 += dotmul_op + + conv = layer.conv_operator( + img=ipt0, + filter=ipt1, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv) + with layer.mixed() as conv1: + conv1 += conv + + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + if __name__ == '__main__': unittest.main() From bb7db754208a7484ced25eb879bd77e7f6fae6c9 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Sat, 25 Feb 2017 10:07:15 +0800 Subject: [PATCH 27/35] add testing for duplicate item --- python/paddle/v2/tests/test_data_feeder.py | 23 +++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py index 4d5df6e893..5f67da6a5b 100644 --- a/python/paddle/v2/tests/test_data_feeder.py +++ b/python/paddle/v2/tests/test_data_feeder.py @@ -176,7 +176,7 @@ class DataFeederTest(unittest.TestCase): self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1]) self.assertEqual(output_index[i], data[i][0]) - # reader returns 3 featreus, but only use 2 features + # reader returns 3 features, but only use 2 features data_types = [('fea0', data_type.dense_vector(100)), ('fea2', data_type.integer_value(10))] feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0}) @@ -187,6 +187,27 @@ class DataFeederTest(unittest.TestCase): self.assertEqual(output_dense[i].all(), data[i][2].all()) self.assertEqual(output_index[i], data[i][0]) + # reader returns 3 featreus, one is duplicate data + data_types = [('fea0', data_type.dense_vector(100)), + ('fea1', data_type.sparse_binary_vector(20000)), + ('fea2', data_type.integer_value(10)), + ('fea3', data_type.dense_vector(100))] + feeder = DataFeeder(data_types, + {'fea0': 2, + 'fea1': 1, + 'fea2': 0, + 'fea3': 2}) + arg = feeder(data) + fea0 = arg.getSlotValue(0).copyToNumpyMat() + fea1 = arg.getSlotValue(1) + fea2 = arg.getSlotIds(2).copyToNumpyArray() + fea3 = arg.getSlotValue(3).copyToNumpyMat() + for i in xrange(batch_size): + self.assertEqual(fea0[i].all(), data[i][2].all()) + self.assertEqual(fea1.getSparseRowCols(i), data[i][1]) + self.assertEqual(fea2[i], data[i][0]) + self.assertEqual(fea3[i].all(), data[i][2].all()) + def test_multiple_features_tuple(self): batch_size = 2 data = [] From a6028d79dcaba69f6f95c7ebf9c12c33ad42b82e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 27 Feb 2017 10:39:17 +0800 Subject: [PATCH 28/35] Clean mnist reader --- python/paddle/v2/data_set/mnist.py | 35 +++++++++--------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/python/paddle/v2/data_set/mnist.py b/python/paddle/v2/data_set/mnist.py index 6f35acf683..4b392af400 100644 --- a/python/paddle/v2/data_set/mnist.py +++ b/python/paddle/v2/data_set/mnist.py @@ -15,39 +15,24 @@ def __mnist_reader_creator__(data, target): return reader -class MNIST(object): - """ - mnist dataset reader. The `train_reader` and `test_reader` method returns - a iterator of each sample. Each sample is combined by 784-dim float and a - one-dim label - """ +TEST_SIZE = 10000 - def __init__(self, random_state=0, test_size=10000, **options): - data = sklearn.datasets.mldata.fetch_mldata( - "MNIST original", data_home=DATA_HOME) - self.X_train, self.X_test, self.y_train, self.y_test = sklearn.model_selection.train_test_split( - data.data, - data.target, - test_size=test_size, - random_state=random_state, - **options) +data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) +X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + data.data, data.target, test_size=TEST_SIZE, random_state=0) - def train_creator(self): - return __mnist_reader_creator__(self.X_train, self.y_train) - def test_creator(self): - return __mnist_reader_creator__(self.X_test, self.y_test) +def train_creator(): + return __mnist_reader_creator__(X_train, y_train) -__default_instance__ = MNIST() -train_creator = __default_instance__.train_creator -test_creator = __default_instance__.test_creator +def test_creator(): + return __mnist_reader_creator__(X_test, y_test) def unittest(): - size = 12045 - mnist = MNIST(test_size=size) - assert len(list(mnist.test_creator()())) == size + assert len(list(test_creator()())) == TEST_SIZE if __name__ == '__main__': From 173a81b56b60567eb4e30f66331ebab4e004ead7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 23 Feb 2017 16:01:20 +0800 Subject: [PATCH 29/35] Complete Event, Add Metric to Event. --- demo/mnist/api_train_v2.py | 14 ++++------ python/paddle/v2/event.py | 55 +++++++++++++++++++++++++++++++++---- python/paddle/v2/trainer.py | 22 +++++++++++++-- 3 files changed, 75 insertions(+), 16 deletions(-) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 6fc01ce58b..6b0cfa0b05 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -27,19 +27,14 @@ def main(): cost = paddle.layer.classification_cost(input=inference, label=label) parameters = paddle.parameters.create(cost) - for param_name in parameters.keys(): - array = parameters.get(param_name) - array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) - parameters.set(parameter_name=param_name, value=array) adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) def event_handler(event): if isinstance(event, paddle.event.EndIteration): - para = parameters.get('___fc_2__.w0') - print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % ( - event.pass_id, event.batch_id, event.cost, para.mean()) - + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) else: pass @@ -49,7 +44,8 @@ def main(): topology=cost, parameters=parameters, event_handler=event_handler, - batch_size=32, # batch size should be refactor in Data reader + num_passes=100, + batch_size=200, # batch size should be refactor in Data reader data_types={ # data_types will be removed, It should be in # network topology 'pixel': images.type, diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index a16cfa91f0..835e28e621 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -3,8 +3,6 @@ All training events. There are: -* BeginTraining -* EndTraining * BeginIteration * EndIteration * BeginPass @@ -12,15 +10,62 @@ There are: TODO(yuyang18): Complete it! """ -__all__ = ['EndIteration'] +import py_paddle.swig_paddle as api +__all__ = ['EndIteration', 'BeginIteration', 'BeginPass', 'EndPass'] -class EndIteration(object): +class WithMetric(object): + def __init__(self, evaluator): + if not isinstance(evaluator, api.Evaluator): + raise TypeError("Evaluator should be api.Evaluator type") + self.__evaluator__ = evaluator + + @property + def metrics(self): + names = self.__evaluator__.getNames() + retv = dict() + for each_name in names: + val = self.__evaluator__.getValue(each_name) + retv[each_name] = val + return retv + + +class BeginPass(object): + """ + Event On One Pass Training Start. + """ + + def __init__(self, pass_id): + self.pass_id = pass_id + + +class EndPass(WithMetric): + """ + Event On One Pass Training Complete. + """ + + def __init__(self, pass_id, evaluator): + self.pass_id = pass_id + WithMetric.__init__(self, evaluator) + + +class BeginIteration(object): + """ + Event On One Batch Training Start. + """ + + def __init__(self, pass_id, batch_id): + self.pass_id = pass_id + self.batch_id = batch_id + + +class EndIteration(WithMetric): """ Event On One Batch Training Complete. """ - def __init__(self, pass_id, batch_id, cost): + def __init__(self, pass_id, batch_id, cost, evaluator): self.pass_id = pass_id self.batch_id = batch_id self.cost = cost + WithMetric.__init__(self, evaluator) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 4365bd41e7..0acfcee2ce 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -96,11 +96,15 @@ class SGD(ITrainer): topology, api.CREATE_MODE_NORMAL, self.__optimizer__.enable_types()) assert isinstance(gm, api.GradientMachine) parameters.append_gradient_machine(gm) - + gm.randParameters() updater = self.__optimizer__.create_local_updater() updater.init(gm) gm.start() + batch_evaluator = gm.makeEvaluator() + assert isinstance(batch_evaluator, api.Evaluator) + pass_evaluator = gm.makeEvaluator() + assert isinstance(pass_evaluator, api.Evaluator) out_args = api.Arguments.createArguments(0) data_types_lists = [] @@ -112,12 +116,20 @@ class SGD(ITrainer): converter = DataProviderConverter(input_types=data_types_lists) for pass_id in xrange(num_passes): + event_handler(v2_event.BeginPass(pass_id)) + pass_evaluator.start() updater.startPass() for batch_id, data_batch in enumerate( __data_reader_to_batch__(train_data_reader, batch_size, topology)): + batch_evaluator.start() + event_handler( + v2_event.BeginIteration( + pass_id=pass_id, batch_id=batch_id)) pass_type = updater.startBatch(len(data_batch)) gm.forwardBackward(converter(data_batch), out_args, pass_type) + gm.eval(pass_evaluator) + gm.eval(batch_evaluator) for each_param in gm.getParameters(): updater.update(each_param) # Get cost. We use numpy to calculate total cost for this batch. @@ -125,11 +137,17 @@ class SGD(ITrainer): cost_vec = cost_vec.copyToNumpyMat() cost = cost_vec.sum() / len(data_batch) updater.finishBatch(cost) + batch_evaluator.finish() event_handler( v2_event.EndIteration( - pass_id=pass_id, batch_id=batch_id, cost=cost)) + pass_id=pass_id, + batch_id=batch_id, + cost=cost, + evaluator=batch_evaluator)) updater.finishPass() + pass_evaluator.finish() + event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) gm.finish() From 72c1327832b1b75057a19d98387546bb2f765ff4 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 27 Feb 2017 13:28:36 +0800 Subject: [PATCH 30/35] follow comments --- python/paddle/v2/data_feeder.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 74ee112f46..632f1760ab 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -62,8 +62,8 @@ class DataFeeder(DataProviderConverter): self.reader_dict = reader_dict for each in data_types: self.input_names.append(each[0]) - self.input_types.append(each[1]) assert isinstance(each[1], data_type.InputType) + self.input_types.append(each[1]) DataProviderConverter.__init__(self, self.input_types) def convert(self, dat, argument=None): @@ -88,24 +88,16 @@ class DataFeeder(DataProviderConverter): :type argument: swig_paddle.Arguments """ - if argument is None: - argument = swig_paddle.Arguments.createArguments(0) - assert isinstance(argument, swig_paddle.Arguments) - argument.resize(len(self.input_types)) - - scanners = [ - DataProviderConverter.create_scanner(i, each_type) - for i, each_type in enumerate(self.input_types) - ] - - for each_sample in dat: - for name, scanner in zip(self.input_names, scanners): - scanner.scan(each_sample[self.reader_dict[name]]) - - for scanner in scanners: - scanner.finish_scan(argument) + def reorder_data(data): + retv = [] + for each in data: + reorder = [] + for name in self.input_names: + reorder.append(each[self.reader_dict[name]]) + retv.append(reorder) + return retv - return argument + return DataProviderConverter.convert(self, reorder_data(dat), argument) def __call__(self, dat, argument=None): return self.convert(dat, argument) From aa92f0b673906edd3b585d5db645c0a3e075cc06 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 27 Feb 2017 10:32:30 +0800 Subject: [PATCH 31/35] follow comments --- python/paddle/v2/layer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index a3fac6ca67..b9400ab26b 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -105,6 +105,13 @@ __all__ = [ 'conv_projection', ] +__projection_names__ = filter(lambda x: x.endswith('_projection'), + dir(conf_helps)) +__all__ += __projection_names__ + +__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps)) +__all__ += __operator_names__ + def parse_network(*outputs): """ @@ -224,8 +231,7 @@ class MixedLayerV2(Layer): """ class AddToSealedMixedLayerExceptionV2(Exception): - def __init__(self): - Exception.__init__(self) + pass def __init__(self, size=0, @@ -277,7 +283,7 @@ class MixedLayerV2(Layer): @wrap_name_default("mixed") -@wrap_act_default(act=conf_helps.LinearActivation()) +@wrap_act_default(act=activation.Linear()) @wrap_bias_attr_default(has_bias=False) @layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT) def mixed(size=0, From 5d7e7bc042e3118e9cf4a1899bc08dd44a1cf34c Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 27 Feb 2017 13:37:53 +0800 Subject: [PATCH 32/35] add test_layer for v2 --- python/paddle/v2/layer.py | 126 --------------------------- python/paddle/v2/tests/test_layer.py | 122 ++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 133 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index f4a85e9d03..f0e4f972fe 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -74,9 +74,6 @@ from paddle.trainer_config_helpers.config_parser_utils import \ from paddle.trainer_config_helpers.default_decorators import wrap_name_default import data_type -import activation -import attr -import pooling __all__ = [ 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp', @@ -277,126 +274,3 @@ layer_list = [ ] for l in layer_list: globals()[l[0]] = __convert_to_v2__(l[1], l[2]) - -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(128)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - word = data(name='word', type=data_type.integer_value(12)) - score = data(name='score', type=data_type.dense_vector(1)) - - hidden = fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) - inference = fc(input=hidden, size=10, act=activation.Softmax()) - print parse_network(inference) - - # test conv layers - conv1 = conv_shift(a=pixel, b=score) - conv2 = img_conv( - input=pixel, - filter_size=1, - filter_size_y=1, - num_channels=8, - num_filters=16, - act=activation.Linear()) - print parse_network(conv1, conv2) - - # test image pooling layers - maxpool = img_pool( - input=conv2, - pool_size=2, - num_channels=16, - padding=1, - pool_type=pooling.Max()) - spp = spp(input=conv2, - pyramid_height=2, - num_channels=16, - pool_type=pooling.Max()) - maxout = maxout(input=conv2, num_channels=16, groups=4) - print parse_network(maxpool, spp, maxout) - - # test norm layers - norm1 = img_cmrnorm(input=maxpool, size=5) - norm2 = batch_norm(input=maxpool) - norm3 = sum_to_one_norm(input=maxpool) - print parse_network(norm1, norm2, norm3) - - # test recurrent layers - recurrent = recurrent(input=word) - lstm = lstmemory(input=word) - gru = grumemory(input=word) - print parse_network(recurrent, lstm, gru) - - # test aggregate layers - pool = pool( - input=pixel, - pooling_type=pooling.Avg(), - agg_level=AggregateLevel.EACH_SEQUENCE) - last_seq = last_seq(input=pixel) - first_seq = first_seq(input=pixel) - concat = concat(input=[last_seq, first_seq]) - seq_concat = seq_concat(a=last_seq, b=first_seq) - print parse_network(pool, last_seq, first_seq, concat, seq_concat) - - # test reshaping layers - block_expand = block_expand( - input=maxout, num_channels=4, stride_x=1, block_x=1) - expand = expand( - input=last_seq, expand_as=pixel, expand_level=ExpandLevel.FROM_TIMESTEP) - repeat = repeat(input=last_seq, num_repeats=4) - reshape = seq_reshape(input=last_seq, reshape_size=4) - rotate = rotate(input=pixel, height=16, width=49) - print parse_network(block_expand, expand, repeat, reshape, rotate) - - # test math layers - addto = addto(input=[last_seq, first_seq]) - linear_comb = linear_comb(weights=weight, vectors=hidden, size=10) - interpolation = interpolation(input=[hidden, hidden], weight=score) - bilinear = bilinear_interp(input=conv2, out_size_x=4, out_size_y=4) - power = power(input=conv1, weight=score) - scaling = scaling(input=conv1, weight=score) - slope = slope_intercept(input=conv1) - tensor = tensor(a=last_seq, b=first_seq, size=1000) - cos_sim = cos_sim(a=last_seq, b=first_seq) - trans = trans(input=tensor) - print parse_network(addto, linear_comb, interpolation, bilinear, power, - scaling, slope, tensor, cos_sim, trans) - - # test sampling layers - maxid = max_id(input=inference) - sampling_id = sampling_id(input=inference) - print parse_network(maxid, sampling_id) - - # test slicing and joining layers - pad = pad(input=maxpool, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) - print parse_network(pad) - - # test cost layers - cost1 = classification_cost(input=inference, label=label) - cost2 = classification_cost(input=inference, label=label, weight=weight) - cost3 = cross_entropy_cost(input=inference, label=label) - cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) - cost5 = regression_cost(input=inference, label=label) - cost6 = regression_cost(input=inference, label=label, weight=weight) - cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) - cost8 = rank_cost(left=score, right=score, label=score) - cost9 = lambda_cost(input=inference, score=score) - cost10 = sum_cost(input=inference) - cost11 = huber_cost(input=score, label=label) - print parse_network(cost3, cost4) - print parse_network(cost5, cost6) - print parse_network(cost7, cost8, cost9, cost10, cost11) - - crf = crf(input=inference, label=label) - crf_decoding = crf_decoding(input=inference, size=3) - ctc = ctc(input=inference, label=label) - warp_ctc = warp_ctc(input=pixel, label=label) - nce = nce(input=inference, label=label, num_classes=3) - hsigmoid = hsigmoid(input=inference, label=label, num_classes=3) - print parse_network(crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid) - - # test check layers - eos = eos(input=maxid, eos_id=5) - print parse_network(eos) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index b600e8cf76..2f139866e8 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -19,18 +19,106 @@ import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer +import paddle.v2.pooling as pooling from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as parse_network -pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) +pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) label = layer.data(name='label', type=data_type.integer_value(10)) weight = layer.data(name='weight', type=data_type.dense_vector(10)) score = layer.data(name='score', type=data_type.dense_vector(1)) + hidden = layer.fc(input=pixel, size=100, act=activation.Sigmoid(), param_attr=attr.Param(name='hidden')) inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) +conv = layer.img_conv( + input=pixel, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + act=activation.Linear()) + + +class ImageLayerTest(unittest.TestCase): + def test_conv_layer(self): + conv_shift = layer.conv_shift(a=pixel, b=score) + print layer.parse_network(conv, conv_shift) + + def test_pooling_layer(self): + maxpool = layer.img_pool( + input=conv, + pool_size=2, + num_channels=16, + padding=1, + pool_type=pooling.Max()) + spp = layer.spp(input=conv, + pyramid_height=2, + num_channels=16, + pool_type=pooling.Max()) + maxout = layer.maxout(input=conv, num_channels=16, groups=4) + print layer.parse_network(maxpool, spp, maxout) + + def test_norm_layer(self): + norm1 = layer.img_cmrnorm(input=conv, size=5) + norm2 = layer.batch_norm(input=conv) + norm3 = layer.sum_to_one_norm(input=conv) + print layer.parse_network(norm1, norm2, norm3) + + +class AggregateLayerTest(unittest.TestCase): + def test_aggregate_layer(self): + pool = layer.pool( + input=pixel, + pooling_type=pooling.Avg(), + agg_level=layer.AggregateLevel.EACH_SEQUENCE) + last_seq = layer.last_seq(input=pixel) + first_seq = layer.first_seq(input=pixel) + concat = layer.concat(input=[last_seq, first_seq]) + seq_concat = layer.seq_concat(a=last_seq, b=first_seq) + print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat) + + +class MathLayerTest(unittest.TestCase): + def test_math_layer(self): + addto = layer.addto(input=[pixel, pixel]) + linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) + interpolation = layer.interpolation( + input=[hidden, hidden], weight=score) + bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) + power = layer.power(input=pixel, weight=score) + scaling = layer.scaling(input=pixel, weight=score) + slope = layer.slope_intercept(input=pixel) + tensor = layer.tensor(a=pixel, b=pixel, size=1000) + cos_sim = layer.cos_sim(a=pixel, b=pixel) + trans = layer.trans(input=tensor) + print layer.parse_network(addto, linear_comb, interpolation, power, + scaling, slope, tensor, cos_sim, trans) + + +class ReshapeLayerTest(unittest.TestCase): + def test_reshape_layer(self): + block_expand = layer.block_expand( + input=conv, num_channels=4, stride_x=1, block_x=1) + expand = layer.expand( + input=weight, + expand_as=pixel, + expand_level=layer.ExpandLevel.FROM_TIMESTEP) + repeat = layer.repeat(input=pixel, num_repeats=4) + reshape = layer.seq_reshape(input=pixel, reshape_size=4) + rotate = layer.rotate(input=pixel, height=16, width=49) + print layer.parse_network(block_expand, expand, repeat, reshape, rotate) + + +class RecurrentLayerTest(unittest.TestCase): + def test_recurrent_layer(self): + word = layer.data(name='word', type=data_type.integer_value(12)) + recurrent = layer.recurrent(input=word) + lstm = layer.lstmemory(input=word) + gru = layer.grumemory(input=word) + print layer.parse_network(recurrent, lstm, gru) class CostLayerTest(unittest.TestCase): @@ -51,12 +139,32 @@ class CostLayerTest(unittest.TestCase): cost10 = layer.sum_cost(input=inference) cost11 = layer.huber_cost(input=score, label=label) - print dir(layer) - layer.parse_network(cost1, cost2) - print dir(layer) - #print layer.parse_network(cost3, cost4) - #print layer.parse_network(cost5, cost6) - #print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + crf = layer.crf(input=inference, label=label) + crf_decoding = layer.crf_decoding(input=inference, size=3) + ctc = layer.ctc(input=inference, label=label) + warp_ctc = layer.warp_ctc(input=pixel, label=label) + nce = layer.nce(input=inference, label=label, num_classes=3) + hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) + + print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce, + hsigmoid) + + +class OtherLayerTest(unittest.TestCase): + def test_sampling_layer(self): + maxid = layer.max_id(input=inference) + sampling_id = layer.sampling_id(input=inference) + eos = layer.eos(input=maxid, eos_id=5) + print layer.parse_network(maxid, sampling_id, eos) + + def test_slicing_joining_layer(self): + pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) + print layer.parse_network(pad) if __name__ == '__main__': From bc074d0e581bb0804d11246c3d87ccaa1a5abc50 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 27 Feb 2017 13:43:41 +0800 Subject: [PATCH 33/35] minor change --- python/paddle/v2/data_feeder.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 632f1760ab..2a16d46dda 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -58,13 +58,13 @@ class DataFeeder(DataProviderConverter): :type reader_dict: dict() """ self.input_names = [] - self.input_types = [] + input_types = [] self.reader_dict = reader_dict for each in data_types: self.input_names.append(each[0]) assert isinstance(each[1], data_type.InputType) - self.input_types.append(each[1]) - DataProviderConverter.__init__(self, self.input_types) + input_types.append(each[1]) + DataProviderConverter.__init__(self, input_types) def convert(self, dat, argument=None): """ @@ -98,6 +98,3 @@ class DataFeeder(DataProviderConverter): return retv return DataProviderConverter.convert(self, reorder_data(dat), argument) - - def __call__(self, dat, argument=None): - return self.convert(dat, argument) From b59f35ef7ea97b74cc500bf5b28aed410c64e941 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 27 Feb 2017 14:22:49 +0800 Subject: [PATCH 34/35] follow some comments --- python/paddle/v2/layer.py | 61 ++++++++-------------------- python/paddle/v2/tests/test_layer.py | 14 +++---- 2 files changed, 21 insertions(+), 54 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index b9400ab26b..e7d986d4c4 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -82,27 +82,10 @@ import activation import attr __all__ = [ - 'parse_network', - 'data', - 'fc', - 'max_id', - 'classification_cost', - 'cross_entropy_cost', - 'cross_entropy_with_selfnorm_cost', - 'regression_cost', - 'multi_binary_label_cross_entropy_cost', - 'rank_cost', - 'lambda_cost', - 'sum_cost', - 'huber_cost' - 'full_matrix_projection', - 'trans_full_matrix_projection', - 'table_projection', - 'identity_projection', - 'scaling_projection', - 'dotmul_projection', - 'context_projection', - 'conv_projection', + 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', + 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', + 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', + 'sum_cost', 'huber_cost' ] __projection_names__ = filter(lambda x: x.endswith('_projection'), @@ -167,7 +150,7 @@ def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, **kwargs): + def __init__(self, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -178,6 +161,7 @@ def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): if key not in parent_names: other_kwargs[key] = kwargs[key] + name = kwargs['name'] if kwargs.has_key('name') else None super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -242,32 +226,30 @@ class MixedLayerV2(Layer): layer_attr=None): self.__method_name__ = 'mixed_layer' self.finalized = False - - self.__parent_layers__ = dict() - other_kwargs = dict() - self.input_name = 'input' - self.__parent_layers__[self.input_name] = [] + self.__inputs__ = [] if input is not None: - self.__parent_layers__[self.input_name] = input + self.__inputs__ = input - self.name = name + other_kwargs = dict() + other_kwargs['name'] = name other_kwargs['size'] = size other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - Layer.__init__(self, name, self.__parent_layers__) + parent_layers = {"input": self.__inputs__} + super(MixedLayerV2, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs def __iadd__(self, other): if not self.finalized: - self.__parent_layers__[self.input_name].append(other) + self.__inputs__.append(other) return self else: raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() def __enter__(self): - assert len(self.__parent_layers__[self.input_name]) == 0 + assert len(self.__inputs__) == 0 return self def __exit__(self, *args, **kwargs): @@ -279,7 +261,7 @@ class MixedLayerV2(Layer): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] - return getattr(conf_helps, self.__method_name__)(name=self.name, **args) + return getattr(conf_helps, self.__method_name__)(**args) @wrap_name_default("mixed") @@ -331,18 +313,7 @@ huber_cost = __convert_to_v2__( 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) # convert projection -projection_list = [ - # [V1_method_name], all the parent_names is `input` - 'full_matrix_projection', - 'trans_full_matrix_projection', - 'table_projection', - 'scaling_projection', - 'dotmul_projection', - 'context_projection', - 'conv_projection', - 'identity_projection', -] -for prj in projection_list: +for prj in __projection_names__: globals()[prj] = __convert_to_v2__(prj, parent_names=['input']) # convert operator diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 521bc8b40c..77423e4aa1 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -32,7 +32,7 @@ inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) class CostLayerTest(unittest.TestCase): - def test_cost_layer(self): + def not_test_cost_layer(self): cost1 = layer.classification_cost(input=inference, label=label) cost2 = layer.classification_cost( input=inference, label=label, weight=weight) @@ -60,12 +60,8 @@ class CostLayerTest(unittest.TestCase): input = layer.data(name='data', type=data_type.dense_vector(784)) word = layer.data( name='word', type=data_type.integer_value_sequence(10000)) - fc0 = layer.fc(input=input, - size=100, - act=conf_helps.SigmoidActivation()) - fc1 = layer.fc(input=input, - size=200, - act=conf_helps.SigmoidActivation()) + fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ @@ -121,8 +117,8 @@ class CostLayerTest(unittest.TestCase): def test_operator(self): ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) - fc0 = layer.fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) - fc1 = layer.fc(input=ipt0, size=100, act=conf_helps.SigmoidActivation()) + fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) dotmul0 = layer.mixed(input=dotmul_op) From d25173c0248a2e62db250ba481297c2c01ce1605 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 27 Feb 2017 16:30:53 +0800 Subject: [PATCH 35/35] follow comments --- python/paddle/v2/layer.py | 6 +++--- python/paddle/v2/tests/test_layer.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index e7d986d4c4..d5fe06542b 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -134,9 +134,9 @@ class Layer(object): if self.name is None: return self.to_proto_impl(**kwargs) - - if self.name not in context: + elif self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): @@ -161,7 +161,7 @@ def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): if key not in parent_names: other_kwargs[key] = kwargs[key] - name = kwargs['name'] if kwargs.has_key('name') else None + name = kwargs.get('name', None) super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 77423e4aa1..bf1c344202 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -32,7 +32,7 @@ inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) class CostLayerTest(unittest.TestCase): - def not_test_cost_layer(self): + def test_cost_layer(self): cost1 = layer.classification_cost(input=inference, label=label) cost2 = layer.classification_cost( input=inference, label=label, weight=weight)