From 55b96287aeb08305b90d3351c602c207d9c38290 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 23 Feb 2017 22:45:51 +0800 Subject: [PATCH 01/31] support rnn --- python/paddle/v2/layer.py | 87 +++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 507725ee4f..bebe7c6690 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -73,6 +73,7 @@ from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default +import activation import data_type __all__ = [ @@ -97,10 +98,11 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name, parent_layers): + def __init__(self, name, parent_layers, step_input=None): assert isinstance(parent_layers, dict) assert isinstance(name, basestring) self.name = name + self.step_input = step_input self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -116,8 +118,14 @@ class Layer(object): else: v1_layer = map(lambda x: x.to_proto(context=context), self.__parent_layers__[layer_name]) + if layer_name == "input" and self.step_input is not None: + v1_layer.insert(0, self.step_input) kwargs[layer_name] = v1_layer + # memory may have the same name with some layer + if isinstance(self, MemoryV2): + return self.to_proto_impl(**kwargs) + if self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) return context[self.name] @@ -133,7 +141,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, **kwargs): + def __init__(self, name=None, step_input=None, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -143,7 +151,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): if key not in parent_names: other_kwargs[key] = kwargs[key] - super(V2LayerImpl, self).__init__(name, parent_layers) + super(V2LayerImpl, self).__init__(name, parent_layers, step_input) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -186,6 +194,22 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class MemoryV2(Layer): + def __init__(self, name, size, **kwargs): + self.name = name + self.size = size + self.__kwargs__ = kwargs + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return conf_helps.memory(name=self.name, size=self.size, **args) + + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( @@ -198,6 +222,13 @@ cross_entropy_cost = __convert_to_v2__( 'cross_entropy', name_prefix='cross_entropy', parent_names=['input', 'label']) +embedding = __convert_to_v2__( + 'embedding_layer', name_prefix='embedding', parent_names=['input']) +last_seq = __convert_to_v2__( + 'last_seq', name_prefix='last_seq', parent_names=['input']) +recurrent_group = __convert_to_v2__( + 'recurrent_group', name_prefix='recurrent_layer', parent_names=['input']) +memory = MemoryV2 if __name__ == '__main__': pixel = data(name='pixel', type=data_type.dense_vector(784)) @@ -208,8 +239,48 @@ if __name__ == '__main__': cost1 = classification_cost(input=inference, label=label) cost2 = cross_entropy_cost(input=inference, label=label) - print parse_network(cost1) - print parse_network(cost2) - print parse_network(cost1, cost2) - print parse_network(cost2) - print parse_network(inference, maxid) + mem = memory(name="rnn_state", size=10) + + # print parse_network(cost1) + # print parse_network(cost2) + # print parse_network(cost1, cost2) + # print parse_network(cost2) + # print parse_network(inference, maxid) + + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data1 = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data1, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + # print __parse__(test) + + # yyyyyyyy + def new_step(y): + mem = memory(name="rnn_state", size=hidden_dim) + out = fc(input=[mem], + step_input=y, + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out.to_proto(dict()) + + data1 = data(name="word", type=data_type.integer_value(dict_dim)) + embd = embedding(input=data1, size=word_dim) + aaa = recurrent_group(name="rnn", step=new_step, input=embd) + print parse_network(aaa) From 92f52e3bb7a1a203a01d3641887c6bdfd03dce67 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 13:46:59 +0800 Subject: [PATCH 02/31] add rnn test --- demo/mnist/api_train_v2.py | 3 ++ python/paddle/v2/layer.py | 43 +---------------- python/paddle/v2/tests/layer_test.py | 72 ++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 41 deletions(-) create mode 100644 python/paddle/v2/tests/layer_test.py diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 6fc01ce58b..5e66b7399a 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -3,6 +3,9 @@ import paddle.v2 as paddle import mnist_util +import pudb +pudb.set_trace() + def train_reader(): train_file = './data/raw_data/train' diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 44ebebcaea..e1952ce747 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -73,16 +73,15 @@ from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default -import activation -import data_type import activation import attr +import data_type __all__ = [ 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', - 'sum_cost', 'huber_cost' + 'sum_cost', 'huber_cost', 'memory', 'embedding', 'recurrent_group' ] @@ -294,41 +293,3 @@ if __name__ == '__main__': print parse_network(cost5, cost6) print parse_network(cost7, cost8, cost9, cost10, cost11) print parse_network(inference, maxid) - - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - label_dim = 3 - - def step(y): - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - def test(): - data1 = conf_helps.data_layer(name="word", size=dict_dim) - embd = conf_helps.embedding_layer(input=data1, size=word_dim) - conf_helps.recurrent_group(name="rnn", step=step, input=embd) - - # print __parse__(test) - - # yyyyyyyy - def new_step(y): - mem = memory(name="rnn_state", size=hidden_dim) - out = fc(input=[mem], - step_input=y, - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out.to_proto(dict()) - - data1 = data(name="word", type=data_type.integer_value(dict_dim)) - embd = embedding(input=data1, size=word_dim) - aaa = recurrent_group(name="rnn", step=new_step, input=embd) - print parse_network(aaa) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py new file mode 100644 index 0000000000..87e601a60a --- /dev/null +++ b/python/paddle/v2/tests/layer_test.py @@ -0,0 +1,72 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def test_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data1 = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data1, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def test_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[mem], + step_input=y, + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out.to_proto(dict()) + + data1 = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data1, size=word_dim) + aaa = layer.recurrent_group(name="rnn", step=new_step, input=embd) + return str(layer.parse_network(aaa)) + + diff = difflib.unified_diff(test_old_rnn().splitlines(1), + test_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main() From 6b80c2b4f9a626efa911f715dcb45bee99d80729 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 15:29:11 +0800 Subject: [PATCH 03/31] add cost test --- python/paddle/v2/layer.py | 2 -- python/paddle/v2/tests/layer_test.py | 35 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index e1952ce747..f333c0af96 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -281,8 +281,6 @@ if __name__ == '__main__': cost10 = sum_cost(input=inference) cost11 = huber_cost(input=score, label=label) - mem = memory(name="rnn_state", size=10) - # print parse_network(cost1) # print parse_network(cost2) # print parse_network(cost1, cost2) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py index 87e601a60a..6c4b458914 100644 --- a/python/paddle/v2/tests/layer_test.py +++ b/python/paddle/v2/tests/layer_test.py @@ -18,10 +18,45 @@ import paddle.trainer_config_helpers as conf_helps import paddle.v2.activation as activation import paddle.v2.data_type as data_type import paddle.v2.layer as layer +import paddle.v2.attr as attr from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as parse_network +class CostLyaerTest(unittest.TestCase): + def test_cost_layer(self): + pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) + label = layer.data(name='label', type=data_type.integer_value(10)) + weight = layer.data(name='weight', type=data_type.dense_vector(10)) + score = layer.data(name='score', type=data_type.dense_vector(1)) + hidden = layer.fc(input=pixel, + size=100, + act=activation.Sigmoid(), + param_attr=attr.Param(name='hidden')) + inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) + + cost1 = layer.classification_cost(input=inference, label=label) + cost2 = layer.classification_cost( + input=inference, label=label, weight=weight) + cost3 = layer.cross_entropy_cost(input=inference, label=label) + cost4 = layer.cross_entropy_with_selfnorm_cost( + input=inference, label=label) + cost5 = layer.regression_cost(input=inference, label=label) + cost6 = layer.regression_cost( + input=inference, label=label, weight=weight) + cost7 = layer.multi_binary_label_cross_entropy_cost( + input=inference, label=label) + cost8 = layer.rank_cost(left=score, right=score, label=score) + cost9 = layer.lambda_cost(input=inference, score=score) + cost10 = layer.sum_cost(input=inference) + cost11 = layer.huber_cost(input=score, label=label) + + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + class RNNTest(unittest.TestCase): def test_simple_rnn(self): dict_dim = 10 From db92e3c884a586d0f28dcc7c7e3be99c1e6203f6 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 16:04:59 +0800 Subject: [PATCH 04/31] refine code --- python/paddle/v2/layer.py | 35 ---------------------------- python/paddle/v2/tests/layer_test.py | 7 +++--- 2 files changed, 4 insertions(+), 38 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index f333c0af96..5ecc96c685 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -256,38 +256,3 @@ sum_cost = __convert_to_v2__( 'sum_cost', name_prefix='sum_cost', parent_names=['input']) huber_cost = __convert_to_v2__( 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) - -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - score = data(name='score', type=data_type.dense_vector(1)) - - hidden = fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) - inference = fc(input=hidden, size=10, act=activation.Softmax()) - maxid = max_id(input=inference) - cost1 = classification_cost(input=inference, label=label) - cost2 = classification_cost(input=inference, label=label, weight=weight) - cost3 = cross_entropy_cost(input=inference, label=label) - cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) - cost5 = regression_cost(input=inference, label=label) - cost6 = regression_cost(input=inference, label=label, weight=weight) - cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) - cost8 = rank_cost(left=score, right=score, label=score) - cost9 = lambda_cost(input=inference, score=score) - cost10 = sum_cost(input=inference) - cost11 = huber_cost(input=score, label=label) - - # print parse_network(cost1) - # print parse_network(cost2) - # print parse_network(cost1, cost2) - # print parse_network(cost2) - # print parse_network(inference, maxid) - print parse_network(cost1, cost2) - print parse_network(cost3, cost4) - print parse_network(cost5, cost6) - print parse_network(cost7, cost8, cost9, cost10, cost11) - print parse_network(inference, maxid) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py index 6c4b458914..2958cbd9eb 100644 --- a/python/paddle/v2/tests/layer_test.py +++ b/python/paddle/v2/tests/layer_test.py @@ -16,9 +16,9 @@ import unittest import paddle.trainer_config_helpers as conf_helps import paddle.v2.activation as activation +import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -import paddle.v2.attr as attr from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as parse_network @@ -95,8 +95,9 @@ class RNNTest(unittest.TestCase): data1 = layer.data( name="word", type=data_type.integer_value(dict_dim)) embd = layer.embedding(input=data1, size=word_dim) - aaa = layer.recurrent_group(name="rnn", step=new_step, input=embd) - return str(layer.parse_network(aaa)) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) diff = difflib.unified_diff(test_old_rnn().splitlines(1), test_new_rnn().splitlines(1)) From e4327a7cd9408839900c0f82b4aedf2ce6672cbd Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 16:11:02 +0800 Subject: [PATCH 05/31] add CMakeLists.txt --- python/paddle/v2/tests/CMakeLists.txt | 4 ++++ python/paddle/v2/tests/layer_test.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/tests/CMakeLists.txt diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt new file mode 100644 index 0000000000..dc5efdab6a --- /dev/null +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +add_test(NAME layer_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/layer_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py index 2958cbd9eb..83c8c26d6b 100644 --- a/python/paddle/v2/tests/layer_test.py +++ b/python/paddle/v2/tests/layer_test.py @@ -23,7 +23,7 @@ from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as parse_network -class CostLyaerTest(unittest.TestCase): +class CostLayerTest(unittest.TestCase): def test_cost_layer(self): pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) From f13f1f1ce5cfe428c272e90f85dc9a9c1ed55f6b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 17:37:38 +0800 Subject: [PATCH 06/31] use test_layer instead of layer_test --- python/paddle/v2/tests/test_layer.py | 57 +++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index b600e8cf76..73d769a358 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -51,12 +51,57 @@ class CostLayerTest(unittest.TestCase): cost10 = layer.sum_cost(input=inference) cost11 = layer.huber_cost(input=score, label=label) - print dir(layer) - layer.parse_network(cost1, cost2) - print dir(layer) - #print layer.parse_network(cost3, cost4) - #print layer.parse_network(cost5, cost6) - #print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def test_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data1 = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data1, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def test_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[mem], + step_input=y, + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out.to_proto(dict()) + + data1 = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data1, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(test_old_rnn().splitlines(1), + test_new_rnn().splitlines(1)) + print ''.join(diff) if __name__ == '__main__': From ad4ab5ac811d90dd2bbb661ad34ba5ee3aa510a1 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 26 Feb 2017 16:29:02 +0800 Subject: [PATCH 07/31] remove step_input in recurrent_group step_input --- .../paddle/trainer_config_helpers/layers.py | 8 ++- python/paddle/v2/layer.py | 61 +++++++++++++++---- python/paddle/v2/tests/test_layer.py | 13 ++-- 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 00aef80691..4e200517fc 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3042,7 +3042,8 @@ def recurrent_group(step, reverse=False, name=None, targetInlink=None, - is_generating=False): + is_generating=False, + in_args_converter=None): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3185,7 +3186,10 @@ def recurrent_group(step, assert (is_generating != has_LayerOutput) - layer_outs = step(*in_args) + if in_args_converter is None: + layer_outs = step(*in_args) + else: + layer_outs = step(*in_args_converter(*in_args)).to_proto(dict()) if isinstance(layer_outs, LayerOutput): layer_outs = [layer_outs] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 5ecc96c685..44c7661b24 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -73,8 +73,6 @@ from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default -import activation -import attr import data_type __all__ = [ @@ -101,11 +99,10 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name, parent_layers, step_input=None): + def __init__(self, name, parent_layers): assert isinstance(parent_layers, dict) assert isinstance(name, basestring) self.name = name - self.step_input = step_input self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -121,12 +118,13 @@ class Layer(object): else: v1_layer = map(lambda x: x.to_proto(context=context), self.__parent_layers__[layer_name]) - if layer_name == "input" and self.step_input is not None: - v1_layer.insert(0, self.step_input) kwargs[layer_name] = v1_layer + if self.name is None: + return self.to_proto_impl(**kwargs) + # memory may have the same name with some layer - if isinstance(self, MemoryV2): + if isinstance(self, MemoryV2) or isinstance(self, LayerOutputV2): return self.to_proto_impl(**kwargs) if self.name not in context: @@ -144,7 +142,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, step_input=None, **kwargs): + def __init__(self, name=None, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -155,7 +153,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): if key not in parent_names: other_kwargs[key] = kwargs[key] - super(V2LayerImpl, self).__init__(name, parent_layers, step_input) + super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -214,6 +212,48 @@ class MemoryV2(Layer): return conf_helps.memory(name=self.name, size=self.size, **args) +class LayerOutputV2(Layer): + def __init__(self, layer_output): + assert isinstance(layer_output, conf_helps.LayerOutput) + self.layer_output = layer_output + super(LayerOutputV2, self).__init__( + name=layer_output.name, parent_layers=dict()) + + def to_proto_impl(self): + return self.layer_output + + +class RecurrentGroupV2(Layer): + def __init__(self, name, **kwargs): + self.__parent_names__ = ['input'] + other_kwargs = dict() + parent_layers = dict() + for pname in self.__parent_names__: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + for key in kwargs.keys(): + if key not in self.__parent_names__: + other_kwargs[key] = kwargs[key] + self.__kwargs__ = other_kwargs + + super(RecurrentGroupV2, self).__init__( + name=name, parent_layers=parent_layers) + + def to_proto_impl(self, **kwargs): + def in_args_converter(in_args): + if not isinstance(in_args, collections.Sequence): + in_args = [in_args] + return [LayerOutputV2(input) for input in in_args] + + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return conf_helps.recurrent_group( + name=self.name, in_args_converter=in_args_converter, **args) + + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( @@ -234,8 +274,7 @@ embedding = __convert_to_v2__( 'embedding_layer', name_prefix='embedding', parent_names=['input']) last_seq = __convert_to_v2__( 'last_seq', name_prefix='last_seq', parent_names=['input']) -recurrent_group = __convert_to_v2__( - 'recurrent_group', name_prefix='recurrent_layer', parent_names=['input']) +recurrent_group = RecurrentGroupV2 memory = MemoryV2 cross_entropy_with_selfnorm_cost = __convert_to_v2__( diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 73d769a358..04c0fc7cb0 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -63,7 +63,7 @@ class RNNTest(unittest.TestCase): word_dim = 8 hidden_dim = 8 - def test_old_rnn(): + def parse_old_rnn(): def step(y): mem = conf_helps.memory(name="rnn_state", size=hidden_dim) out = conf_helps.fc_layer( @@ -81,16 +81,15 @@ class RNNTest(unittest.TestCase): return str(parse_network(test)) - def test_new_rnn(): + def parse_new_rnn(): def new_step(y): mem = layer.memory(name="rnn_state", size=hidden_dim) - out = layer.fc(input=[mem], - step_input=y, + out = layer.fc(input=[y, mem], size=hidden_dim, act=activation.Tanh(), bias_attr=True, name="rnn_state") - return out.to_proto(dict()) + return out data1 = layer.data( name="word", type=data_type.integer_value(dict_dim)) @@ -99,8 +98,8 @@ class RNNTest(unittest.TestCase): name="rnn", step=new_step, input=embd) return str(layer.parse_network(rnn_layer)) - diff = difflib.unified_diff(test_old_rnn().splitlines(1), - test_new_rnn().splitlines(1)) + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) print ''.join(diff) From 632ad5c9e25c906b0189be308ecf22c2409abb2c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 27 Feb 2017 19:59:38 +0800 Subject: [PATCH 08/31] support sequence_rnn_multi_input --- demo/mnist/api_train_v2.py | 3 - python/paddle/trainer/config_parser.py | 6 +- python/paddle/v2/layer.py | 30 ++++- python/paddle/v2/tests/CMakeLists.txt | 6 +- python/paddle/v2/tests/test_layer.py | 50 -------- python/paddle/v2/tests/test_rnn_layer.py | 143 +++++++++++++++++++++++ 6 files changed, 178 insertions(+), 60 deletions(-) create mode 100644 python/paddle/v2/tests/test_rnn_layer.py diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 5e66b7399a..6fc01ce58b 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -3,9 +3,6 @@ import paddle.v2 as paddle import mnist_util -import pudb -pudb.set_trace() - def train_reader(): train_file = './data/raw_data/train' diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da937152ee..487d4dfd5b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3474,6 +3474,8 @@ def update_g_config(): for name in g_config.model_config.output_layer_names: assert name in g_layer_map, \ 'input name "%s" does not correspond to a layer name' % name + for hook in _parse_config_hooks: + hook() return g_config @@ -3485,8 +3487,8 @@ def parse_config(trainer_config, config_arg_str): passed to config script as a dictionary CONFIG_ARGS ''' init_config_environment() - for hook in _parse_config_hooks: - hook() + # for hook in _parse_config_hooks: + # hook() config_args = {} diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 44c7661b24..5328070456 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -124,11 +124,13 @@ class Layer(object): return self.to_proto_impl(**kwargs) # memory may have the same name with some layer - if isinstance(self, MemoryV2) or isinstance(self, LayerOutputV2): + if isinstance(self, MemoryV2): return self.to_proto_impl(**kwargs) + # store v1 API's layer_output in context with the key of it's name. if self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): @@ -200,8 +202,19 @@ class MemoryV2(Layer): def __init__(self, name, size, **kwargs): self.name = name self.size = size - self.__kwargs__ = kwargs - super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + + parent_names = ['boot_layer'] + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + super(MemoryV2, self).__init__(name=name, parent_layers=parent_layers) + self.__kwargs__ = other_kwargs def to_proto_impl(self, **kwargs): args = dict() @@ -209,10 +222,16 @@ class MemoryV2(Layer): args[each] = kwargs[each] for each in self.__kwargs__: args[each] = self.__kwargs__[each] + return conf_helps.memory(name=self.name, size=self.size, **args) class LayerOutputV2(Layer): + """ + LayerOutputV2 is used to store the result of LayerOutput in v1 api. + It will not store it's parents because layer_output has been parsed already. + """ + def __init__(self, layer_output): assert isinstance(layer_output, conf_helps.LayerOutput) self.layer_output = layer_output @@ -239,8 +258,11 @@ class RecurrentGroupV2(Layer): super(RecurrentGroupV2, self).__init__( name=name, parent_layers=parent_layers) + wrapper = wrap_name_default(name_prefix='recurrent_group') + __init__ = wrapper(__init__) + def to_proto_impl(self, **kwargs): - def in_args_converter(in_args): + def in_args_converter(*in_args): if not isinstance(in_args, collections.Sequence): in_args = [in_args] return [LayerOutputV2(input) for input in in_args] diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index ceb71c1454..bc0f247927 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,3 +1,7 @@ add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py) + +add_test(NAME test_v2_rnn_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 04c0fc7cb0..41d9683464 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -11,16 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import difflib import unittest -import paddle.trainer_config_helpers as conf_helps import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -from paddle.trainer_config_helpers.config_parser_utils import \ - parse_network_config as parse_network pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) @@ -57,51 +53,5 @@ class CostLayerTest(unittest.TestCase): print layer.parse_network(cost7, cost8, cost9, cost10, cost11) -class RNNTest(unittest.TestCase): - def test_simple_rnn(self): - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - - def parse_old_rnn(): - def step(y): - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - def test(): - data1 = conf_helps.data_layer(name="word", size=dict_dim) - embd = conf_helps.embedding_layer(input=data1, size=word_dim) - conf_helps.recurrent_group(name="rnn", step=step, input=embd) - - return str(parse_network(test)) - - def parse_new_rnn(): - def new_step(y): - mem = layer.memory(name="rnn_state", size=hidden_dim) - out = layer.fc(input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - data1 = layer.data( - name="word", type=data_type.integer_value(dict_dim)) - embd = layer.embedding(input=data1, size=word_dim) - rnn_layer = layer.recurrent_group( - name="rnn", step=new_step, input=embd) - return str(layer.parse_network(rnn_layer)) - - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py new file mode 100644 index 0000000000..bf2c4db61a --- /dev/null +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -0,0 +1,143 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def parse_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def parse_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + def test_sequence_rnn_multi_input(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def parse_old_rnn(): + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + label = conf_helps.data_layer(name="label", size=label_dim) + emb = conf_helps.embedding_layer(input=data, size=word_dim) + out = conf_helps.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = conf_helps.last_seq(input=out) + prob = conf_helps.fc_layer( + size=label_dim, + input=rep, + act=conf_helps.SoftmaxActivation(), + bias_attr=True) + + conf_helps.outputs( + conf_helps.classification_cost( + input=prob, label=label)) + + return str(parse_network(test)) + + def parse_new_rnn(): + def step(y, wid): + z = layer.embedding(input=wid, size=word_dim) + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, z, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + out = layer.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = layer.last_seq(input=out) + prob = layer.fc(size=label_dim, + input=rep, + act=activation.Softmax(), + bias_attr=True) + + cost = layer.classification_cost(input=prob, label=label) + + return str(layer.parse_network(cost)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main() From 876d597495015ba416639af7426258d32587986e Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 27 Feb 2017 20:41:00 +0800 Subject: [PATCH 09/31] memory have same name with some layer --- python/paddle/v2/layer.py | 2 ++ python/paddle/v2/tests/CMakeLists.txt | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 6dea3afbcf..dbd802bee8 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -139,6 +139,8 @@ class Layer(object): if self.name is None: return self.to_proto_impl(**kwargs) + elif isinstance(self, MemoryV2): + return self.to_proto_impl(**kwargs) elif self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index bc0f247927..b2f43c42de 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,7 +1,11 @@ add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py) + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) add_test(NAME test_v2_rnn_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) + +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) From b6a0f9a32a4a3e35f9d8ffa4728c69fada5fe5ed Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 28 Feb 2017 18:00:34 +0800 Subject: [PATCH 10/31] Add vgg training via api v2 --- demo/image_classification/train_v2_vgg.py | 85 +++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 demo/image_classification/train_v2_vgg.py diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/train_v2_vgg.py new file mode 100644 index 0000000000..33b53b27da --- /dev/null +++ b/demo/image_classification/train_v2_vgg.py @@ -0,0 +1,85 @@ +import paddle.v2 as paddle + + +def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, + event.cost) + else: + pass + + +def vgg_bn_drop(input): + def conv_block(ipt, num_filter, groups, dropouts, num_channels=None): + return paddle.layer.img_conv_group( + input=ipt, + num_channels=num_channels, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act=paddle.activation.Relu(), + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type=pooling.Max()) + + conv1 = conv_block(input, 64, 2, [0.3, 0], 3) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5) + fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear()) + bn = paddle.layer.batch_norm( + input=fc1, + act=paddle.activation.Relu(), + layer_attr=ExtraAttr(drop_rate=0.5)) + fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear()) + return fc2 + + +def main(): + datadim = 3 * 32 * 32 + classdim = 10 + + paddle.init(use_gpu=False, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(datadim)) + # net = vgg_bn_drop(image) + out = paddle.layer.fc(input=image, + size=classdim, + act=paddle.activation.Softmax()) + + lbl = paddle.layer.data( + name="label", type=paddle.data_type.integer_value(classdim)) + cost = paddle.layer.classification_cost(input=out, label=lbl) + + parameters = paddle.parameters.create(cost) + momentum_optimizer = paddle.optimizer.Momentum( + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128), + learning_rate=0.1 / 128.0, + learning_rate_decay_a=0.1, + learning_rate_decay_b=50000 * 100, + learning_rate_schedule='discexp', + batch_size=128) + + trainer = paddle.trainer.SGD(update_equation=momentum_optimizer) + trainer.train( + reader=paddle.reader.batched( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=3072), + batch_size=128), + cost=cost, + num_passes=1, + parameters=parameters, + event_handler=event_handler, + reader_dict={'image': 0, + 'label': 1}, ) + + +if __name__ == '__main__': + main() From 7ad8363036af9d8ae91e6698ff09804023602bdf Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 28 Feb 2017 19:44:56 +0800 Subject: [PATCH 11/31] support boot_layer --- .../paddle/trainer_config_helpers/layers.py | 6 ++- python/paddle/v2/layer.py | 37 ++++++++++++++++--- python/paddle/v2/tests/CMakeLists.txt | 1 - 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a8b536dda4..4f7a2735e2 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3110,7 +3110,8 @@ def recurrent_group(step, name=None, targetInlink=None, is_generating=False, - in_args_converter=None): + in_args_converter=None, + boot_layer=None): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3256,6 +3257,9 @@ def recurrent_group(step, if in_args_converter is None: layer_outs = step(*in_args) else: + # append boot_layer to the last of in_args + if boot_layer is not None: + in_args.append(boot_layer) layer_outs = step(*in_args_converter(*in_args)).to_proto(dict()) if isinstance(layer_outs, LayerOutput): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 1155eca9c8..542d5a515c 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -140,10 +140,13 @@ class Layer(object): if self.name is None: return self.to_proto_impl(**kwargs) elif isinstance(self, MemoryV2): - return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) + name = self.name + "#__memory__" + if name not in context: + context[name] = self.to_proto_impl(**kwargs) + return context[name] + if self.name not in context: + context[self.name] = self.to_proto_impl(**kwargs) return context[self.name] def to_proto_impl(self, **kwargs): @@ -256,9 +259,32 @@ class LayerOutputV2(Layer): return self.layer_output +class StaticInputV2(Layer): + def __init__(self, **kwargs): + self.__parent_names__ = ['input'] + other_kwargs = dict() + parent_layers = dict() + for pname in self.__parent_names__: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + for key in kwargs.keys(): + if key not in self.__parent_names__: + other_kwargs[key] = kwargs[key] + self.__kwargs__ = other_kwargs + super(StaticInputV2, self).__init__(parent_layers=parent_layers) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return conf_helps.StaticInput(**args) + + class RecurrentGroupV2(Layer): def __init__(self, name, **kwargs): - self.__parent_names__ = ['input'] + self.__parent_names__ = ['input', 'boot_layer'] other_kwargs = dict() parent_layers = dict() for pname in self.__parent_names__: @@ -443,7 +469,8 @@ layer_list = [ ['nce', 'nce_layer', ['input', 'label']], ['hsigmoid', 'hsigmoid', ['input', 'label']], # check layers - ['eos', 'eos_layer', ['input']] + ['eos', 'eos_layer', ['input']], + ['gru_step_layer', 'gru_step_layer', ['input', 'output_mem']] ] for l in layer_list: globals()[l[0]] = __convert_to_v2__(l[1], l[2]) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index 948cebdf72..572deaff35 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -10,7 +10,6 @@ add_test(NAME test_v2_rnn_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) - add_test(NAME test_topology COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py From f7ecd312c5a56c48eeafd63fb168f106ad973e66 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 1 Mar 2017 12:49:35 +0800 Subject: [PATCH 12/31] update event handler --- demo/image_classification/train_v2_vgg.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/train_v2_vgg.py index 33b53b27da..25bfd798eb 100644 --- a/demo/image_classification/train_v2_vgg.py +++ b/demo/image_classification/train_v2_vgg.py @@ -6,8 +6,6 @@ def event_handler(event): if event.batch_id % 100 == 0: print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, event.cost) - else: - pass def vgg_bn_drop(input): From d227f4479e5d9b58c45059871c5cd4e221b1a05f Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 1 Mar 2017 15:06:24 +0800 Subject: [PATCH 13/31] Add resnet --- demo/image_classification/train_v2_resnet.py | 158 +++++++++++++++++++ demo/image_classification/train_v2_vgg.py | 16 +- 2 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 demo/image_classification/train_v2_resnet.py diff --git a/demo/image_classification/train_v2_resnet.py b/demo/image_classification/train_v2_resnet.py new file mode 100644 index 0000000000..fdfa87cd87 --- /dev/null +++ b/demo/image_classification/train_v2_resnet.py @@ -0,0 +1,158 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + + +def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, + event.cost) + + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def shortcut(ipt, n_in, n_out, stride): + if n_in != n_out: + print("n_in != n_out") + return conv_bn_layer(ipt, n_out, 1, stride, 0, + paddle.activation.Linear()) + else: + return ipt + + +def basicblock(ipt, ch_out, stride): + ch_in = ipt.num_filters + tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear()) + short = shortcut(ipt, ch_in, ch_out, stride) + return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) + + +def bottleneck(ipt, ch_out, stride): + ch_in = ipt.num_filter + tmp = conv_bn_layer(ipt, ch_out, 1, stride, 0) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1) + tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, paddle.activation.Linear()) + short = shortcut(ipt, ch_in, ch_out * 4, stride) + return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) + + +def layer_warp(block_func, ipt, features, count, stride): + tmp = block_func(ipt, features, stride) + for i in range(1, count): + tmp = block_func(tmp, features, 1) + return tmp + + +def resnet_imagenet(ipt, depth=50): + cfg = { + 18: ([2, 2, 2, 1], basicblock), + 34: ([3, 4, 6, 3], basicblock), + 50: ([3, 4, 6, 3], bottleneck), + 101: ([3, 4, 23, 3], bottleneck), + 152: ([3, 8, 36, 3], bottleneck) + } + stages, block_func = cfg[depth] + tmp = conv_bn_layer( + ipt, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) + tmp = paddle.layer.img_pool(input=tmp, pool_size=3, stride=2) + tmp = layer_warp(block_func, tmp, 64, stages[0], 1) + tmp = layer_warp(block_func, tmp, 128, stages[1], 2) + tmp = layer_warp(block_func, tmp, 256, stages[2], 2) + tmp = layer_warp(block_func, tmp, 512, stages[3], 2) + tmp = paddle.layer.img_pool( + input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) + + tmp = paddle.layer.fc(input=tmp, size=1000, act=paddle.activation.Softmax()) + return tmp + + +def resnet_cifar10(ipt, depth=32): + # depth should be one of 20, 32, 44, 56, 110, 1202 + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + nStages = {16, 64, 128} + conv1 = conv_bn_layer( + ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = paddle.layer.img_pool( + input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) + return pool + + +def main(): + datadim = 3 * 32 * 32 + classdim = 10 + + paddle.init(use_gpu=False, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(datadim)) + net = resnet_cifar10(image, depth=32) + out = paddle.layer.fc(input=net, + size=classdim, + act=paddle.activation.Softmax()) + + lbl = paddle.layer.data( + name="label", type=paddle.data_type.integer_value(classdim)) + cost = paddle.layer.classification_cost(input=out, label=lbl) + + parameters = paddle.parameters.create(cost) + + momentum_optimizer = paddle.optimizer.Momentum( + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), + learning_rate=0.1 / 128.0, + learning_rate_decay_a=0.1, + learning_rate_decay_b=50000 * 100, + learning_rate_schedule='discexp', + batch_size=128) + + trainer = paddle.trainer.SGD(update_equation=momentum_optimizer) + trainer.train( + reader=paddle.reader.batched( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=3072), + batch_size=128), + cost=cost, + num_passes=1, + parameters=parameters, + event_handler=event_handler, + reader_dict={'image': 0, + 'label': 1}, ) + + +if __name__ == '__main__': + main() diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/train_v2_vgg.py index 25bfd798eb..5656ac85c6 100644 --- a/demo/image_classification/train_v2_vgg.py +++ b/demo/image_classification/train_v2_vgg.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2 as paddle @@ -20,7 +34,7 @@ def vgg_bn_drop(input): conv_act=paddle.activation.Relu(), conv_with_batchnorm=True, conv_batchnorm_drop_rate=dropouts, - pool_type=pooling.Max()) + pool_type=paddle.pooling.Max()) conv1 = conv_block(input, 64, 2, [0.3, 0], 3) conv2 = conv_block(conv1, 128, 2, [0.4, 0]) From 73af1942c8fcf89fffa0a13b7d8fdc6cdcdcb815 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 1 Mar 2017 15:54:46 +0800 Subject: [PATCH 14/31] add the implementation of rnn by yuyang --- .../paddle/trainer_config_helpers/layers.py | 2 +- python/paddle/v2/layer.py | 151 +++++++++++------- 2 files changed, 97 insertions(+), 56 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 250878cbe1..dcb39784a5 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -822,7 +822,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): return LayerOutput(name, LayerType.DATA, size=size) -@wrap_name_default("embedding") +@wrap_name_default("embedding_layer") @wrap_param_attr_default() @layer_support(ERROR_CLIPPING) def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 4f6c71664a..71d0e54c0a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -76,6 +76,9 @@ from paddle.trainer_config_helpers.default_decorators import \ wrap_bias_attr_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.layers import layer_support +from paddle.trainer.config_parser import \ + RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \ + RecurrentLayerGroupEnd, model_type import activation import data_type @@ -126,21 +129,28 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) elif isinstance(self, MemoryV2): name = self.name + "#__memory__" if name not in context: context[name] = self.to_proto_impl(**kwargs) return context[name] - - if self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: @@ -231,6 +241,9 @@ class MemoryV2(Layer): return conf_helps.memory(name=self.name, size=self.size, **args) + def context_name(self): + return self.name + "#memory" + class LayerOutputV2(Layer): """ @@ -249,60 +262,20 @@ class LayerOutputV2(Layer): class StaticInputV2(Layer): - def __init__(self, **kwargs): - self.__parent_names__ = ['input'] - other_kwargs = dict() - parent_layers = dict() - for pname in self.__parent_names__: - if kwargs.has_key(pname): - parent_layers[pname] = kwargs[pname] - for key in kwargs.keys(): - if key not in self.__parent_names__: - other_kwargs[key] = kwargs[key] - self.__kwargs__ = other_kwargs - super(StaticInputV2, self).__init__(parent_layers=parent_layers) - - def to_proto_impl(self, **kwargs): - args = dict() - for each in kwargs: - args[each] = kwargs[each] - for each in self.__kwargs__: - args[each] = self.__kwargs__[each] - return conf_helps.StaticInput(**args) - - -class RecurrentGroupV2(Layer): - def __init__(self, name, **kwargs): - self.__parent_names__ = ['input', 'boot_layer'] - other_kwargs = dict() - parent_layers = dict() - for pname in self.__parent_names__: - if kwargs.has_key(pname): - parent_layers[pname] = kwargs[pname] - for key in kwargs.keys(): - if key not in self.__parent_names__: - other_kwargs[key] = kwargs[key] - self.__kwargs__ = other_kwargs - - super(RecurrentGroupV2, self).__init__( - name=name, parent_layers=parent_layers) + def __init__(self, input=None, **kwargs): + assert input is not None + self.__kwargs__ = kwargs + super(StaticInputV2, self).__init__( + name=input.name, parent_layers={'input': input}) - wrapper = wrap_name_default(name_prefix='recurrent_group') - __init__ = wrapper(__init__) + def context_name(self): + return self.name + "#static_input" def to_proto_impl(self, **kwargs): - def in_args_converter(*in_args): - if not isinstance(in_args, collections.Sequence): - in_args = [in_args] - return [LayerOutputV2(input) for input in in_args] - args = dict() - for each in kwargs: - args[each] = kwargs[each] - for each in self.__kwargs__: - args[each] = self.__kwargs__[each] - return conf_helps.recurrent_group( - name=self.name, in_args_converter=in_args_converter, **args) + args.update(kwargs) + args.update(self.__kwargs__) + return conf_helps.StaticInput(**args) class MixedLayerV2(Layer): @@ -377,11 +350,79 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) +class RecurrentLayerInput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + print self.__parents__, parent_layers + super(RecurrentLayerInput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".begin" + + def to_proto_impl(self, **kwargs): + model_type('recurrent_nn') + RecurrentLayerGroupWithoutOutLinksBegin( + name=self.__recurrent_name__, + in_links=map(lambda x: x.name, self.__parents__)) + return self + + +class RecurrentLayerOutput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerOutput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".end" + + def to_proto_impl(self, **kwargs): + for l in self.__parents__: + RecurrentLayerGroupSetOutLink(l.name) + RecurrentLayerGroupEnd(name=self.__recurrent_name__) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': input}) + for i in xrange(len(input)) + ] + + actual_output = step(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv + + LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel -recurrent_group = RecurrentGroupV2 +recurrent_group = recurrent_group memory = MemoryV2 From fd3be087d603bc1ea399769b79c1b0f9e1758161 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 1 Mar 2017 16:01:42 +0800 Subject: [PATCH 15/31] restore recurrent_group in v1 --- python/paddle/trainer_config_helpers/layers.py | 12 ++---------- python/paddle/v2/layer.py | 5 ----- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index dcb39784a5..2b95c2ed0f 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3115,9 +3115,7 @@ def recurrent_group(step, reverse=False, name=None, targetInlink=None, - is_generating=False, - in_args_converter=None, - boot_layer=None): + is_generating=False): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3260,13 +3258,7 @@ def recurrent_group(step, assert (is_generating != has_LayerOutput) - if in_args_converter is None: - layer_outs = step(*in_args) - else: - # append boot_layer to the last of in_args - if boot_layer is not None: - in_args.append(boot_layer) - layer_outs = step(*in_args_converter(*in_args)).to_proto(dict()) + layer_outs = step(*in_args) if isinstance(layer_outs, LayerOutput): layer_outs = [layer_outs] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 71d0e54c0a..f1ca0b46eb 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -131,11 +131,6 @@ class Layer(object): if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif isinstance(self, MemoryV2): - name = self.name + "#__memory__" - if name not in context: - context[name] = self.to_proto_impl(**kwargs) - return context[name] elif self.context_name() not in context: context[self.context_name()] = self.to_proto_impl(**kwargs) return context[self.name] From 6b199367e0339119a699292ff488976bdb22554f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 1 Mar 2017 16:27:55 +0800 Subject: [PATCH 16/31] handle memory layer --- python/paddle/v2/layer.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index f1ca0b46eb..bdb0c29a47 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -133,7 +133,11 @@ class Layer(object): return self.to_proto_impl(**kwargs) elif self.context_name() not in context: context[self.context_name()] = self.to_proto_impl(**kwargs) - return context[self.name] + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() @@ -146,6 +150,9 @@ class Layer(object): """ return self.name + def use_context_name(self): + return False + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: @@ -239,6 +246,13 @@ class MemoryV2(Layer): def context_name(self): return self.name + "#memory" + def use_context_name(self): + """ + memory layer will have the same name with some layer + :return: + """ + return True + class LayerOutputV2(Layer): """ From ad44a3ebcaa062342ec799f020bd3975e6b5f899 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 1 Mar 2017 20:14:43 +0800 Subject: [PATCH 17/31] Update vgg and resnet via api v2 --- demo/image_classification/api_v2_resnet.py | 74 ++++++++ .../{train_v2_vgg.py => api_v2_train.py} | 59 ++----- demo/image_classification/api_v2_vgg.py | 47 ++++++ demo/image_classification/train_v2_resnet.py | 158 ------------------ 4 files changed, 139 insertions(+), 199 deletions(-) create mode 100644 demo/image_classification/api_v2_resnet.py rename demo/image_classification/{train_v2_vgg.py => api_v2_train.py} (55%) create mode 100644 demo/image_classification/api_v2_vgg.py delete mode 100644 demo/image_classification/train_v2_resnet.py diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py new file mode 100644 index 0000000000..19d2054078 --- /dev/null +++ b/demo/image_classification/api_v2_resnet.py @@ -0,0 +1,74 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + +__all__ = ['resnet_cifar10'] + + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def shortcut(ipt, n_in, n_out, stride): + if n_in != n_out: + return conv_bn_layer(ipt, n_out, 1, stride, 0, + paddle.activation.Linear()) + else: + return ipt + + +def basicblock(ipt, ch_out, stride): + ch_in = ch_out * 2 + tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear()) + short = shortcut(ipt, ch_in, ch_out, stride) + return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) + + +def layer_warp(block_func, ipt, features, count, stride): + tmp = block_func(ipt, features, stride) + for i in range(1, count): + tmp = block_func(tmp, features, 1) + return tmp + + +def resnet_cifar10(ipt, depth=32): + # depth should be one of 20, 32, 44, 56, 110, 1202 + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + nStages = {16, 64, 128} + conv1 = conv_bn_layer( + ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = paddle.layer.img_pool( + input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) + return pool diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/api_v2_train.py similarity index 55% rename from demo/image_classification/train_v2_vgg.py rename to demo/image_classification/api_v2_train.py index 5656ac85c6..44a8db3941 100644 --- a/demo/image_classification/train_v2_vgg.py +++ b/demo/image_classification/api_v2_train.py @@ -10,9 +10,10 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. +# limitations under the License -import paddle.v2 as paddle +from api_v2_vgg import resnet_cifar10 +from api_v2_resnet import vgg_bn_drop def event_handler(event): @@ -22,46 +23,21 @@ def event_handler(event): event.cost) -def vgg_bn_drop(input): - def conv_block(ipt, num_filter, groups, dropouts, num_channels=None): - return paddle.layer.img_conv_group( - input=ipt, - num_channels=num_channels, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act=paddle.activation.Relu(), - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type=paddle.pooling.Max()) - - conv1 = conv_block(input, 64, 2, [0.3, 0], 3) - conv2 = conv_block(conv1, 128, 2, [0.4, 0]) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - - drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5) - fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear()) - bn = paddle.layer.batch_norm( - input=fc1, - act=paddle.activation.Relu(), - layer_attr=ExtraAttr(drop_rate=0.5)) - fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear()) - return fc2 - - def main(): datadim = 3 * 32 * 32 classdim = 10 - paddle.init(use_gpu=False, trainer_count=1) + paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(datadim)) + + # option 1. resnet + net = resnet_cifar10(image, depth=32) + # option 2. vgg # net = vgg_bn_drop(image) - out = paddle.layer.fc(input=image, + + out = paddle.layer.fc(input=net, size=classdim, act=paddle.activation.Softmax()) @@ -70,27 +46,28 @@ def main(): cost = paddle.layer.classification_cost(input=out, label=lbl) parameters = paddle.parameters.create(cost) + momentum_optimizer = paddle.optimizer.Momentum( momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128), + regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), learning_rate=0.1 / 128.0, learning_rate_decay_a=0.1, learning_rate_decay_b=50000 * 100, learning_rate_schedule='discexp', batch_size=128) - trainer = paddle.trainer.SGD(update_equation=momentum_optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=momentum_optimizer) trainer.train( reader=paddle.reader.batched( paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=3072), + paddle.dataset.cifar.train10(), buf_size=50000), batch_size=128), - cost=cost, - num_passes=1, - parameters=parameters, + num_passes=5, event_handler=event_handler, reader_dict={'image': 0, - 'label': 1}, ) + 'label': 1}) if __name__ == '__main__': diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py new file mode 100644 index 0000000000..1e0e6b93ad --- /dev/null +++ b/demo/image_classification/api_v2_vgg.py @@ -0,0 +1,47 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + +__all__ = ['vgg_bn_drop'] + + +def vgg_bn_drop(input): + def conv_block(ipt, num_filter, groups, dropouts, num_channels=None): + return paddle.networks.img_conv_group( + input=ipt, + num_channels=num_channels, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act=paddle.activation.Relu(), + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type=paddle.pooling.Max()) + + conv1 = conv_block(input, 64, 2, [0.3, 0], 3) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5) + fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear()) + bn = paddle.layer.batch_norm( + input=fc1, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear()) + return fc2 diff --git a/demo/image_classification/train_v2_resnet.py b/demo/image_classification/train_v2_resnet.py deleted file mode 100644 index fdfa87cd87..0000000000 --- a/demo/image_classification/train_v2_resnet.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2 as paddle - - -def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, - event.cost) - - -def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - active_type=paddle.activation.Relu(), - ch_in=None): - tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=ch_in, - num_filters=ch_out, - stride=stride, - padding=padding, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.batch_norm(input=tmp, act=active_type) - - -def shortcut(ipt, n_in, n_out, stride): - if n_in != n_out: - print("n_in != n_out") - return conv_bn_layer(ipt, n_out, 1, stride, 0, - paddle.activation.Linear()) - else: - return ipt - - -def basicblock(ipt, ch_out, stride): - ch_in = ipt.num_filters - tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) - tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear()) - short = shortcut(ipt, ch_in, ch_out, stride) - return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) - - -def bottleneck(ipt, ch_out, stride): - ch_in = ipt.num_filter - tmp = conv_bn_layer(ipt, ch_out, 1, stride, 0) - tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1) - tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, paddle.activation.Linear()) - short = shortcut(ipt, ch_in, ch_out * 4, stride) - return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) - - -def layer_warp(block_func, ipt, features, count, stride): - tmp = block_func(ipt, features, stride) - for i in range(1, count): - tmp = block_func(tmp, features, 1) - return tmp - - -def resnet_imagenet(ipt, depth=50): - cfg = { - 18: ([2, 2, 2, 1], basicblock), - 34: ([3, 4, 6, 3], basicblock), - 50: ([3, 4, 6, 3], bottleneck), - 101: ([3, 4, 23, 3], bottleneck), - 152: ([3, 8, 36, 3], bottleneck) - } - stages, block_func = cfg[depth] - tmp = conv_bn_layer( - ipt, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) - tmp = paddle.layer.img_pool(input=tmp, pool_size=3, stride=2) - tmp = layer_warp(block_func, tmp, 64, stages[0], 1) - tmp = layer_warp(block_func, tmp, 128, stages[1], 2) - tmp = layer_warp(block_func, tmp, 256, stages[2], 2) - tmp = layer_warp(block_func, tmp, 512, stages[3], 2) - tmp = paddle.layer.img_pool( - input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) - - tmp = paddle.layer.fc(input=tmp, size=1000, act=paddle.activation.Softmax()) - return tmp - - -def resnet_cifar10(ipt, depth=32): - # depth should be one of 20, 32, 44, 56, 110, 1202 - assert (depth - 2) % 6 == 0 - n = (depth - 2) / 6 - nStages = {16, 64, 128} - conv1 = conv_bn_layer( - ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) - res1 = layer_warp(basicblock, conv1, 16, n, 1) - res2 = layer_warp(basicblock, res1, 32, n, 2) - res3 = layer_warp(basicblock, res2, 64, n, 2) - pool = paddle.layer.img_pool( - input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) - return pool - - -def main(): - datadim = 3 * 32 * 32 - classdim = 10 - - paddle.init(use_gpu=False, trainer_count=1) - - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(datadim)) - net = resnet_cifar10(image, depth=32) - out = paddle.layer.fc(input=net, - size=classdim, - act=paddle.activation.Softmax()) - - lbl = paddle.layer.data( - name="label", type=paddle.data_type.integer_value(classdim)) - cost = paddle.layer.classification_cost(input=out, label=lbl) - - parameters = paddle.parameters.create(cost) - - momentum_optimizer = paddle.optimizer.Momentum( - momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), - learning_rate=0.1 / 128.0, - learning_rate_decay_a=0.1, - learning_rate_decay_b=50000 * 100, - learning_rate_schedule='discexp', - batch_size=128) - - trainer = paddle.trainer.SGD(update_equation=momentum_optimizer) - trainer.train( - reader=paddle.reader.batched( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=3072), - batch_size=128), - cost=cost, - num_passes=1, - parameters=parameters, - event_handler=event_handler, - reader_dict={'image': 0, - 'label': 1}, ) - - -if __name__ == '__main__': - main() From 49020f0be80428ba22913062ae877605114134eb Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 1 Mar 2017 20:26:42 +0800 Subject: [PATCH 18/31] import paddle.v2 --- demo/image_classification/api_v2_train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py index 44a8db3941..e6e4307242 100644 --- a/demo/image_classification/api_v2_train.py +++ b/demo/image_classification/api_v2_train.py @@ -14,6 +14,7 @@ from api_v2_vgg import resnet_cifar10 from api_v2_resnet import vgg_bn_drop +import paddle.v2 as paddle def event_handler(event): From 5fc572c29459faf0fbc342e3582ec8b6ee6f02ac Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 1 Mar 2017 20:28:00 +0800 Subject: [PATCH 19/31] Complete Memory --- python/paddle/trainer/config_parser.py | 6 +- python/paddle/v2/layer.py | 99 ++++++++++++++++++------ python/paddle/v2/tests/test_rnn_layer.py | 27 ++++--- 3 files changed, 96 insertions(+), 36 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 487d4dfd5b..da937152ee 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3474,8 +3474,6 @@ def update_g_config(): for name in g_config.model_config.output_layer_names: assert name in g_layer_map, \ 'input name "%s" does not correspond to a layer name' % name - for hook in _parse_config_hooks: - hook() return g_config @@ -3487,8 +3485,8 @@ def parse_config(trainer_config, config_arg_str): passed to config script as a dictionary CONFIG_ARGS ''' init_config_environment() - # for hook in _parse_config_hooks: - # hook() + for hook in _parse_config_hooks: + hook() config_args = {} diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bdb0c29a47..bf5d653e8a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -67,7 +67,7 @@ paddle.v2.parameters.create, no longer exposed to users. """ import collections - +import inspect import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ @@ -216,31 +216,83 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) -class MemoryV2(Layer): - def __init__(self, name, size, **kwargs): - self.name = name - self.size = size +class WithExtraParent(Layer): + def extra_parent(self): + return self.__extra_parent__ - parent_names = ['boot_layer'] - parent_layers = dict() - other_kwargs = dict() - for pname in parent_names: - if kwargs.has_key(pname): - parent_layers[pname] = kwargs[pname] + def __init__(self, name=None, parent_layers=None): + self.__extra_parent__ = [] + super(WithExtraParent, self).__init__(name, parent_layers) - for key in kwargs.keys(): - if key not in parent_names: - other_kwargs[key] = kwargs[key] - super(MemoryV2, self).__init__(name=name, parent_layers=parent_layers) - self.__kwargs__ = other_kwargs + def append_extra_parent(self, parent): + self.__extra_parent__.append(parent) - def to_proto_impl(self, **kwargs): + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for p in self.__extra_parent__: + p.to_proto(context=context) + + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.context_name() is None: + return self.to_proto_impl(context=context, **kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl( + context=context, **kwargs) + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] + + +class MemoryV2(WithExtraParent): + def __init__(self, name, size, **kwargs): + self.name = name + self.size = size + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + self.__kwargs__ = kwargs + self.__boot_layer_name__ = None + if 'boot_layer' in kwargs: + begin_of_current_rnn = [] + # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a + # function inside step. + st = inspect.stack() + for i in xrange(len(st)): + locs = inspect.stack()[i][0].f_locals + for val in locs.viewvalues(): + if isinstance(val, RecurrentLayerInput): + begin_of_current_rnn.append(val) + + if begin_of_current_rnn: + break + assert begin_of_current_rnn is not None + for extra in begin_of_current_rnn: + self.append_extra_parent(extra) + assert isinstance(extra, WithExtraParent) + extra.append_extra_parent(kwargs['boot_layer']) + self.__boot_layer_name__ = kwargs['boot_layer'].name + + def to_proto_impl(self, context, **kwargs): args = dict() for each in kwargs: args[each] = kwargs[each] for each in self.__kwargs__: args[each] = self.__kwargs__[each] + if self.__boot_layer_name__ is not None: + args['boot_layer'] = context[self.__boot_layer_name__] return conf_helps.memory(name=self.name, size=self.size, **args) def context_name(self): @@ -328,7 +380,7 @@ class MixedLayerV2(Layer): self.__inputs__.append(other) return self else: - raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + raise MixedLayerV2.AddToSealedMixedLayerExceptionV2() def __enter__(self): assert len(self.__inputs__) == 0 @@ -359,11 +411,10 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) -class RecurrentLayerInput(Layer): +class RecurrentLayerInput(WithExtraParent): def __init__(self, recurrent_name, index, parent_layers): assert len(parent_layers) == 1 self.__parents__ = parent_layers.values()[0] - print self.__parents__, parent_layers super(RecurrentLayerInput, self).__init__( name=self.__parents__[index].name, parent_layers=parent_layers) self.__recurrent_name__ = recurrent_name @@ -371,7 +422,7 @@ class RecurrentLayerInput(Layer): def context_name(self): return self.__recurrent_name__ + ".begin" - def to_proto_impl(self, **kwargs): + def to_proto_impl(self, context, **kwargs): model_type('recurrent_nn') RecurrentLayerGroupWithoutOutLinksBegin( name=self.__recurrent_name__, @@ -458,8 +509,10 @@ def __layer_name_mapping__(inname): def __layer_name_mapping_parent_names__(inname): all_args = getattr(conf_helps, inname).argspec.args return filter( - lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', - 'weights', 'vectors', 'weight', 'score', 'left', 'right'], + lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', + 'expand_as', + 'weights', 'vectors', 'weight', 'score', 'left', + 'right'], all_args) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py index bf2c4db61a..48aeb42391 100644 --- a/python/paddle/v2/tests/test_rnn_layer.py +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -106,9 +106,21 @@ class RNNTest(unittest.TestCase): return str(parse_network(test)) def parse_new_rnn(): + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + + boot_layer = layer.data( + name="boot", type=data_type.dense_vector(10)) + + boot_layer = layer.fc(name='wtf', input=boot_layer, size=10) + def step(y, wid): z = layer.embedding(input=wid, size=word_dim) - mem = layer.memory(name="rnn_state", size=hidden_dim) + mem = layer.memory( + name="rnn_state", size=hidden_dim, boot_layer=boot_layer) out = layer.fc(input=[y, z, mem], size=hidden_dim, act=activation.Tanh(), @@ -116,11 +128,6 @@ class RNNTest(unittest.TestCase): name="rnn_state") return out - data = layer.data( - name="word", type=data_type.dense_vector(dict_dim)) - label = layer.data( - name="label", type=data_type.dense_vector(label_dim)) - emb = layer.embedding(input=data, size=word_dim) out = layer.recurrent_group( name="rnn", step=step, input=[emb, data]) @@ -134,9 +141,11 @@ class RNNTest(unittest.TestCase): return str(layer.parse_network(cost)) - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) + with open("/Users/baidu/old.out", 'w') as f: + print >> f, parse_old_rnn() + with open("/Users/baidu/new.out", "w") as f: + print >> f, parse_new_rnn() + # print ''.join(diff) if __name__ == '__main__': From 35ec5f0f1a5b497c0e927c98df882a1e9ab40d16 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 09:51:46 +0800 Subject: [PATCH 20/31] Support StaticInput --- python/paddle/v2/__init__.py | 3 +- python/paddle/v2/layer.py | 53 +++++++++++++----------- python/paddle/v2/networks.py | 19 +++++++++ python/paddle/v2/tests/test_rnn_layer.py | 41 +++++++++--------- 4 files changed, 72 insertions(+), 44 deletions(-) create mode 100644 python/paddle/v2/networks.py diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index b31efe170d..4dbcd3bb6b 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,6 +20,7 @@ import event import data_type import topology import data_feeder +import networks from . import dataset from . import reader import attr @@ -29,7 +30,7 @@ import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', - 'topology' + 'topology', 'networks' ] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bf5d653e8a..82ccd8498a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -109,9 +109,10 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.size = size self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -173,7 +174,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - super(V2LayerImpl, self).__init__(name, parent_layers) + size = kwargs.get('size', None) + super(V2LayerImpl, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -220,9 +222,10 @@ class WithExtraParent(Layer): def extra_parent(self): return self.__extra_parent__ - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): self.__extra_parent__ = [] - super(WithExtraParent, self).__init__(name, parent_layers) + super(WithExtraParent, self).__init__( + name=name, size=size, parent_layers=parent_layers) def append_extra_parent(self, parent): self.__extra_parent__.append(parent) @@ -261,7 +264,8 @@ class MemoryV2(WithExtraParent): def __init__(self, name, size, **kwargs): self.name = name self.size = size - super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + super(MemoryV2, self).__init__( + name=name, size=size, parent_layers=dict()) self.__kwargs__ = kwargs self.__boot_layer_name__ = None if 'boot_layer' in kwargs: @@ -271,7 +275,9 @@ class MemoryV2(WithExtraParent): st = inspect.stack() for i in xrange(len(st)): locs = inspect.stack()[i][0].f_locals - for val in locs.viewvalues(): + keys = locs.keys() + for key in keys: + val = locs[key] if isinstance(val, RecurrentLayerInput): begin_of_current_rnn.append(val) @@ -322,21 +328,15 @@ class LayerOutputV2(Layer): return self.layer_output -class StaticInputV2(Layer): - def __init__(self, input=None, **kwargs): - assert input is not None - self.__kwargs__ = kwargs - super(StaticInputV2, self).__init__( - name=input.name, parent_layers={'input': input}) - - def context_name(self): - return self.name + "#static_input" - - def to_proto_impl(self, **kwargs): - args = dict() - args.update(kwargs) - args.update(self.__kwargs__) - return conf_helps.StaticInput(**args) +class StaticInputV2(object): + def __init__(self, input, is_seq=False, size=None): + assert isinstance(input, LayerV2) + self.name = input.name + self.input = input + self.is_seq = is_seq + self.size = size + # TODO(qiaolongfei): add size + # assert input.size is not None or size is not None class MixedLayerV2(Layer): @@ -370,9 +370,8 @@ class MixedLayerV2(Layer): other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - parent_layers = {"input": self.__inputs__} - super(MixedLayerV2, self).__init__(name, parent_layers) + super(MixedLayerV2, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs def __iadd__(self, other): @@ -452,6 +451,12 @@ def recurrent_group(step, input, name=None): if not isinstance(input, collections.Sequence): input = [input] + # TODO(qiaolongfei) convert StaticInput to memory according to v2 recurrent_group + for i in xrange(len(input)): + cur_input = input[i] + if isinstance(cur_input, StaticInputV2): + input[i] = cur_input.input + actual_input = [ RecurrentLayerInput( recurrent_name=name, @@ -512,7 +517,7 @@ def __layer_name_mapping_parent_names__(inname): lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', 'expand_as', 'weights', 'vectors', 'weight', 'score', 'left', - 'right'], + 'right', 'output_mem'], all_args) diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py new file mode 100644 index 0000000000..2877b56b18 --- /dev/null +++ b/python/paddle/v2/networks.py @@ -0,0 +1,19 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from layer import __convert_to_v2__ + +simple_gru = __convert_to_v2__('simple_gru', ['input']) +simple_attention = __convert_to_v2__( + 'simple_attention', ['encoded_sequence', 'encoded_proj', 'decoder_state']) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py index 48aeb42391..5fbbd20eb7 100644 --- a/python/paddle/v2/tests/test_rnn_layer.py +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -74,21 +74,28 @@ class RNNTest(unittest.TestCase): label_dim = 3 def parse_old_rnn(): - def step(y, wid): - z = conf_helps.embedding_layer(input=wid, size=word_dim) - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, z, mem], - size=hidden_dim, - act=conf_helps.TanhActivation(), - bias_attr=True, - name="rnn_state") - return out - def test(): data = conf_helps.data_layer(name="word", size=dict_dim) label = conf_helps.data_layer(name="label", size=label_dim) emb = conf_helps.embedding_layer(input=data, size=word_dim) + boot_layer = conf_helps.data_layer(name="boot", size=10) + boot_layer = conf_helps.fc_layer( + name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory( + name="rnn_state", + size=hidden_dim, + boot_layer=boot_layer) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + out = conf_helps.recurrent_group( name="rnn", step=step, input=[emb, data]) @@ -111,11 +118,9 @@ class RNNTest(unittest.TestCase): label = layer.data( name="label", type=data_type.dense_vector(label_dim)) emb = layer.embedding(input=data, size=word_dim) - boot_layer = layer.data( name="boot", type=data_type.dense_vector(10)) - - boot_layer = layer.fc(name='wtf', input=boot_layer, size=10) + boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) def step(y, wid): z = layer.embedding(input=wid, size=word_dim) @@ -141,11 +146,9 @@ class RNNTest(unittest.TestCase): return str(layer.parse_network(cost)) - with open("/Users/baidu/old.out", 'w') as f: - print >> f, parse_old_rnn() - with open("/Users/baidu/new.out", "w") as f: - print >> f, parse_new_rnn() - # print ''.join(diff) + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) if __name__ == '__main__': From b400c8f02c76ce74828cc999d6bef335cca18a57 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 11:47:33 +0800 Subject: [PATCH 21/31] update to latest --- python/paddle/v2/config_base.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 035f96b0f2..be3e39a06e 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -19,9 +19,10 @@ import paddle.trainer_config_helpers as conf_helps class Layer(object): - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.size = size self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -39,16 +40,30 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) - return context[self.name] + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + + def use_context_name(self): + return False + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: @@ -69,7 +84,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - super(V2LayerImpl, self).__init__(name, parent_layers) + size = kwargs.get('size', None) + super(V2LayerImpl, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: From 0dc68a2c90e2432a3b5678881268fa22e1f0d990 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 2 Mar 2017 14:48:51 +0800 Subject: [PATCH 22/31] add getNonStaticParameters --- demo/image_classification/api_v2_train.py | 4 ++-- paddle/api/GradientMachine.cpp | 14 ++++++++++++++ paddle/api/PaddleAPI.h | 3 +++ paddle/py_paddle/util.py | 6 ++++++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py index e6e4307242..0b4dc4d929 100644 --- a/demo/image_classification/api_v2_train.py +++ b/demo/image_classification/api_v2_train.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License -from api_v2_vgg import resnet_cifar10 -from api_v2_resnet import vgg_bn_drop +from api_v2_vgg import vgg_bn_drop +from api_v2_resnet import resnet_cifar10 import paddle.v2 as paddle diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 538ca2999f..dcb5fe086f 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { } } +size_t GradientMachine::getNonStaticParameterSize() const { + return m->machine->getNonStaticParameters().size(); +} + +Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) { + auto params = m->machine->getNonStaticParameters(); + if (i < params.size()) { + return Parameter::createFromSharedPtr( + &m->machine->getNonStaticParameters()[i]); + } else { + throw RangeError(); + } +} + void GradientMachine::randParameters() { m->machine->randParameters(); } Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index 1831b8e170..764946cf53 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -768,6 +768,9 @@ public: size_t getParameterSize() const; Parameter* getParameter(size_t i) throw(RangeError); + size_t getNonStaticParameterSize() const; + Parameter* getNonStaticParameter(size_t i) throw(RangeError); + void randParameters(); Arguments* getLayerOutput(const std::string& layerName) const diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index a708def1d2..fb337b8af3 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__(): swig_paddle.GradientMachine.getParameters = getParameters + def getNonStaticParameters(self): + return (self.getNonStaticParameter(i) + for i in xrange(self.getNonStaticParameterSize())) + + swig_paddle.GradientMachine.getParameters = getParameters + def getLayerOutputs(self, layerNames): """ getLayerOutputs. get outputs of layers and return a numpy matrix dict. From ce3a399d3d5654804c8f258cb4b2d0455e013606 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 2 Mar 2017 14:54:56 +0800 Subject: [PATCH 23/31] update util.py --- paddle/py_paddle/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index fb337b8af3..1c9455fab5 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -199,7 +199,7 @@ def __monkeypatch_gradient_machine__(): return (self.getNonStaticParameter(i) for i in xrange(self.getNonStaticParameterSize())) - swig_paddle.GradientMachine.getParameters = getParameters + swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters def getLayerOutputs(self, layerNames): """ From c9bb48b308807f80b3ba238cafb97ba4b0eda983 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 15:09:26 +0800 Subject: [PATCH 24/31] support calculate size --- python/paddle/v2/config_base.py | 7 +- python/paddle/v2/layer.py | 110 ++++++++++++++++++++------------ 2 files changed, 75 insertions(+), 42 deletions(-) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index be3e39a06e..573539a30c 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -22,7 +22,7 @@ class Layer(object): def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name - self.size = size + self.__contex__ = {} self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -44,7 +44,7 @@ class Layer(object): return self.to_proto_impl(**kwargs) elif self.context_name() not in context: context[self.context_name()] = self.to_proto_impl(**kwargs) - + self.__contex__ = context if self.use_context_name(): return context[self.context_name()] else: @@ -64,6 +64,9 @@ class Layer(object): def use_context_name(self): return False + def calcalted_size(self): + return self.__contex__[self.context_name()].size + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index e24244a48c..a97518ed52 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -197,6 +197,10 @@ class MemoryV2(WithExtraParent): val = locs[key] if isinstance(val, RecurrentLayerInput): begin_of_current_rnn.append(val) + elif isinstance(val, collections.Sequence): + for v in val: + if isinstance(v, RecurrentLayerInput): + begin_of_current_rnn.append(v) if begin_of_current_rnn: break @@ -216,7 +220,13 @@ class MemoryV2(WithExtraParent): if self.__boot_layer_name__ is not None: args['boot_layer'] = context[self.__boot_layer_name__] - return conf_helps.memory(name=self.name, size=self.size, **args) + + if callable(self.size): + real_size = self.size() + else: + real_size = self.size + args['size'] = real_size + return conf_helps.memory(name=self.name, **args) def context_name(self): return self.name + "#memory" @@ -311,6 +321,12 @@ class MixedLayerV2(Layer): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] + size = args.get('size', None) + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -363,53 +379,15 @@ class RecurrentLayerOutput(Layer): RecurrentLayerGroupEnd(name=self.__recurrent_name__) -@wrap_name_default() -def recurrent_group(step, input, name=None): - if not isinstance(input, collections.Sequence): - input = [input] - - # TODO(qiaolongfei) convert StaticInput to memory according to v2 recurrent_group - for i in xrange(len(input)): - cur_input = input[i] - if isinstance(cur_input, StaticInputV2): - input[i] = cur_input.input - - actual_input = [ - RecurrentLayerInput( - recurrent_name=name, - index=i, - parent_layers={'recurrent_inputs': input}) - for i in xrange(len(input)) - ] - - actual_output = step(*actual_input) - - if not isinstance(actual_output, collections.Sequence): - actual_output = [actual_output] - - retv = [ - RecurrentLayerOutput( - recurrent_name=name, - index=i, - parent_layers={'recurrent_outputs': actual_output}) - for i in xrange(len(actual_output)) - ] - if len(retv) == 1: - return retv[0] - else: - return retv - - LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel -recurrent_group = recurrent_group memory = MemoryV2 def __layer_name_mapping__(inname): - if inname in ['data_layer', 'memory', 'mixed_layer']: + if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']: # Do Not handle these layers return elif inname == 'maxid_layer': @@ -469,3 +447,55 @@ operator_list = [ for op in operator_list: globals()[op[0]] = __convert_to_v2__( op[0], parent_names=op[1], is_default_name=False) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2), + input) + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': non_static_inputs}) + for i in xrange(len(non_static_inputs)) + ] + + def __real_step__(*args): + rnn_input = list(args) + static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) + for static_input in static_inputs: + mem_name = "__%s_memory__" % static_input.input.name + print memory + mem = memory( + name=mem_name, + is_seq=static_input.is_seq, + size=static_input.input.calcalted_size, + boot_layer=static_input.input) + with mixed( + name=mem_name, + size=static_input.input.calcalted_size, + act=activation.Identity()) as mix: + mix += identity_projection(input=mem) + rnn_input.insert(input.index(static_input), mix) + return step(*rnn_input) + + actual_output = __real_step__(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv From 69bf77fd1e71fc57bf0f15820a9dd34bd98c79b6 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 2 Mar 2017 15:09:49 +0800 Subject: [PATCH 25/31] fix trainer v2 getNonStaticParameters --- python/paddle/v2/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index bf8b181e42..44ba9d7ae1 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -113,7 +113,7 @@ class SGD(ITrainer): gm.forwardBackward(feeder(data_batch), out_args, pass_type) gm.eval(pass_evaluator) gm.eval(batch_evaluator) - for each_param in gm.getParameters(): + for each_param in gm.getNonStaticParameters(): updater.update(each_param) # Get cost. We use numpy to calculate total cost for this batch. cost_vec = out_args.getSlotValue(0) From 1164c287b9db46abd9e591ddebe720bc3e08e22d Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 2 Mar 2017 15:14:34 +0800 Subject: [PATCH 26/31] add datasets import --- python/paddle/v2/dataset/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 9647e98503..d222739ba2 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -1,3 +1,7 @@ import mnist +import imikolov +import imdb +import cifar +import movielens -__all__ = ['mnist'] +__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens'] From f9e6aa2c31aa6bc5269cd66eaa8705b0b98af989 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 15:23:19 +0800 Subject: [PATCH 27/31] refine code --- python/paddle/v2/config_base.py | 12 ++++++---- python/paddle/v2/layer.py | 40 +++++++++++++++++---------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 573539a30c..fa2ccec6c3 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -19,7 +19,7 @@ import paddle.trainer_config_helpers as conf_helps class Layer(object): - def __init__(self, name=None, size=None, parent_layers=None): + def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name self.__contex__ = {} @@ -64,7 +64,12 @@ class Layer(object): def use_context_name(self): return False - def calcalted_size(self): + def calculate_size(self): + """ + lazy calculate size of the layer, should be called when to_proto_impl of + this layer is called. + :return: + """ return self.__contex__[self.context_name()].size @@ -87,8 +92,7 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - size = kwargs.get('size', None) - super(V2LayerImpl, self).__init__(name, size, parent_layers) + super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index a97518ed52..0d8b59cfd2 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -139,10 +139,10 @@ class WithExtraParent(Layer): def extra_parent(self): return self.__extra_parent__ - def __init__(self, name=None, size=None, parent_layers=None): + def __init__(self, name=None, parent_layers=None): self.__extra_parent__ = [] super(WithExtraParent, self).__init__( - name=name, size=size, parent_layers=parent_layers) + name=name, parent_layers=parent_layers) def append_extra_parent(self, parent): self.__extra_parent__.append(parent) @@ -178,11 +178,9 @@ class WithExtraParent(Layer): class MemoryV2(WithExtraParent): - def __init__(self, name, size, **kwargs): + def __init__(self, name, **kwargs): self.name = name - self.size = size - super(MemoryV2, self).__init__( - name=name, size=size, parent_layers=dict()) + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) self.__kwargs__ = kwargs self.__boot_layer_name__ = None if 'boot_layer' in kwargs: @@ -221,11 +219,14 @@ class MemoryV2(WithExtraParent): if self.__boot_layer_name__ is not None: args['boot_layer'] = context[self.__boot_layer_name__] - if callable(self.size): - real_size = self.size() - else: - real_size = self.size - args['size'] = real_size + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + print(real_size) + args['size'] = real_size return conf_helps.memory(name=self.name, **args) def context_name(self): @@ -298,7 +299,7 @@ class MixedLayerV2(Layer): other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr parent_layers = {"input": self.__inputs__} - super(MixedLayerV2, self).__init__(name, size, parent_layers) + super(MixedLayerV2, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs def __iadd__(self, other): @@ -322,11 +323,12 @@ class MixedLayerV2(Layer): for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] size = args.get('size', None) - if callable(size): - real_size = size() - else: - real_size = size - args['size'] = real_size + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -473,11 +475,11 @@ def recurrent_group(step, input, name=None): mem = memory( name=mem_name, is_seq=static_input.is_seq, - size=static_input.input.calcalted_size, + size=static_input.input.calculate_size, boot_layer=static_input.input) with mixed( name=mem_name, - size=static_input.input.calcalted_size, + size=static_input.input.calculate_size, act=activation.Identity()) as mix: mix += identity_projection(input=mem) rnn_input.insert(input.index(static_input), mix) From bb66f24334eff70a045c75ef9ff5a22b77c27e81 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 15:32:01 +0800 Subject: [PATCH 28/31] remove debug code --- python/paddle/v2/layer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 0d8b59cfd2..2f55611aaa 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -225,7 +225,6 @@ class MemoryV2(WithExtraParent): real_size = size() else: real_size = size - print(real_size) args['size'] = real_size return conf_helps.memory(name=self.name, **args) @@ -471,7 +470,6 @@ def recurrent_group(step, input, name=None): static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) for static_input in static_inputs: mem_name = "__%s_memory__" % static_input.input.name - print memory mem = memory( name=mem_name, is_seq=static_input.is_seq, From 6d09f70a860f253e00f91685eb73693e3eef5a76 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 2 Mar 2017 15:43:13 +0800 Subject: [PATCH 29/31] Add event_handler test and comment --- demo/image_classification/api_v2_train.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py index 0b4dc4d929..94bf0b5db4 100644 --- a/demo/image_classification/api_v2_train.py +++ b/demo/image_classification/api_v2_train.py @@ -12,27 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License +import sys +import paddle.v2 as paddle from api_v2_vgg import vgg_bn_drop from api_v2_resnet import resnet_cifar10 -import paddle.v2 as paddle +# End batch and end pass event handler def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, - event.cost) + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.reader.batched( + paddle.dataset.cifar.test10(), batch_size=128), + reader_dict={'image': 0, + 'label': 1}) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) def main(): datadim = 3 * 32 * 32 classdim = 10 + # PaddlePaddle init paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(datadim)) + # Add neural network config # option 1. resnet net = resnet_cifar10(image, depth=32) # option 2. vgg @@ -46,8 +60,10 @@ def main(): name="label", type=paddle.data_type.integer_value(classdim)) cost = paddle.layer.classification_cost(input=out, label=lbl) + # Create parameters parameters = paddle.parameters.create(cost) + # Create optimizer momentum_optimizer = paddle.optimizer.Momentum( momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), @@ -57,6 +73,7 @@ def main(): learning_rate_schedule='discexp', batch_size=128) + # Create trainer trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=momentum_optimizer) From 69ac20c2845fa0bb988407a4cd3af7af1aaa7d0a Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 2 Mar 2017 16:53:31 +0800 Subject: [PATCH 30/31] Fix event_handler trainer --- demo/image_classification/api_v2_train.py | 35 +++++++++++------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py index 94bf0b5db4..585f61c6fa 100644 --- a/demo/image_classification/api_v2_train.py +++ b/demo/image_classification/api_v2_train.py @@ -18,24 +18,6 @@ from api_v2_vgg import vgg_bn_drop from api_v2_resnet import resnet_cifar10 -# End batch and end pass event handler -def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.reader.batched( - paddle.dataset.cifar.test10(), batch_size=128), - reader_dict={'image': 0, - 'label': 1}) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) - - def main(): datadim = 3 * 32 * 32 classdim = 10 @@ -73,6 +55,23 @@ def main(): learning_rate_schedule='discexp', batch_size=128) + # End batch and end pass event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.reader.batched( + paddle.dataset.cifar.test10(), batch_size=128), + reader_dict={'image': 0, + 'label': 1}) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + # Create trainer trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, From edce6c8b6ab23c9c7fea1dee75d46fb2bb0f3e31 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 17:51:53 +0800 Subject: [PATCH 31/31] restore embedding_layer name to embedding --- python/paddle/trainer_config_helpers/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 2b95c2ed0f..b68460b6a3 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -822,7 +822,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): return LayerOutput(name, LayerType.DATA, size=size) -@wrap_name_default("embedding_layer") +@wrap_name_default("embedding") @wrap_param_attr_default() @layer_support(ERROR_CLIPPING) def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):