From d23ea4ef8ebc637534c5abafca995be257f83751 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 12 Jan 2018 16:47:57 +0800 Subject: [PATCH 01/22] add gradient clip by norm --- python/paddle/v2/fluid/clip.py | 12 ++++++++++++ python/paddle/v2/fluid/layers/ops.py | 1 + 2 files changed, 13 insertions(+) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index b1fd1c2b65..eb75018d77 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -77,6 +77,18 @@ class GradientClipByValue(BaseGradientClipAttr): return param, new_grad +class GradientClipByNorm(BaseGradientClipAttr): + def __init__(self, clip_norm): + self.clip_norm = clip_norm + + def process_context(self, context, p_g): + pass + + def create_operators(self, param, grad): + new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm) + return param, new_grad + + def append_gradient_clip_ops(param_grad): context = dict() create_op_callbacks = [] diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index d3a5b70785..884e84011d 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -16,6 +16,7 @@ __all__ = [ 'elementwise_sub', 'elementwise_mul', 'clip', + 'clip_by_norm', 'sequence_softmax', ] + __activations__ From adc26dffa9dac81bd93c88d70f0ab66fcdcc81f0 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 15 Jan 2018 10:36:09 +0800 Subject: [PATCH 02/22] developing GradientClipByGlobalNorm --- python/paddle/v2/fluid/clip.py | 54 ++++++++++++++++++++++++---- python/paddle/v2/fluid/layers/ops.py | 20 ++++------- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index eb75018d77..f0904e18ea 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -1,5 +1,6 @@ import functools import layers +from framework import Variable from . import core __all__ = [ @@ -44,7 +45,7 @@ def error_clip_callback(block, context): class BaseGradientClipAttr(object): - def process_context(self, context, p_g): + def process_context(self, context, param, grad): raise NotImplementedError() def create_operators(self, param, grad): @@ -52,7 +53,7 @@ class BaseGradientClipAttr(object): class NullGradientClipAttr(BaseGradientClipAttr): - def process_context(self, context, p_g): + def process_context(self, context, param, grad): pass def create_operators(self, param, grad): @@ -69,7 +70,7 @@ class GradientClipByValue(BaseGradientClipAttr): self.max = max self.min = min - def process_context(self, context, p_g): + def process_context(self, context, param, grad): pass def create_operators(self, param, grad): @@ -81,7 +82,7 @@ class GradientClipByNorm(BaseGradientClipAttr): def __init__(self, clip_norm): self.clip_norm = clip_norm - def process_context(self, context, p_g): + def process_context(self, context, param, grad): pass def create_operators(self, param, grad): @@ -89,6 +90,46 @@ class GradientClipByNorm(BaseGradientClipAttr): return param, new_grad +class GradientClipByGlobalNorm(BaseGradientClipAttr): + global_norm_var = None + clip_norm_var = None + ratio_var = None + + @classmethod + def init(cls, clip_norm): + cls.global_norm_var = layers.fill_constant( + shape=[1], dtype="float32", value=0.0) + cls.clip_norm_var = layers.fill_constant( + shape=[1], dtype="float32", value=clip_norm) + + def __init__(self): + if not (isinstance(self.__class__.global_norm_var, Variable) and + isinstance(self.__class__.clip_norm_var, Variable)): + raise ValueError( + "Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first." + ) + + def process_context(self, context, param, grad): + local_norm_var = layers.reduce_sum( + x=layers.pow(x=grad, factor=2), reduce_all=True) + layers.sums( + input=[local_norm_var, self.__class__.global_norm_var], + out=[self.__class__.global_norm_var]) + + def create_operators(self, param, grad): + if self.__class__.ratio_var is None: + self.__class__.global_norm_var = layers.sqrt( + x=self.__class__.global_norm_var) + self.__class__.ratio_var = layers.elementwise_div( + x=self.__class__.clip_norm_var, + y=layers.elementwise_max( + x=self.__class__.clip_norm_var, + y=self.__class__.global_norm_var)) + # 缺乏elementwise_max + # 没法将ratio_var送给scale_op。 + # new_grad = layers. + + def append_gradient_clip_ops(param_grad): context = dict() create_op_callbacks = [] @@ -98,10 +139,9 @@ def append_gradient_clip_ops(param_grad): clip_attr = NullGradientClipAttr() if not isinstance(clip_attr, BaseGradientClipAttr): raise TypeError( - "clip attribute should be an instance of BaseGradientClippingAttr" - ) + "clip attribute should be an instance of BaseGradientClipAttr") - clip_attr.process_context(context=context, p_g=param_grad) + clip_attr.process_context(context=context, param=p, grad=g) create_op_callbacks.append( functools.partial( clip_attr.create_operators, param=p, grad=g)) diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index 884e84011d..021b87828f 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -1,23 +1,15 @@ from ..registry import register_layer __activations__ = [ - 'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round' + 'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round', + 'pow' ] __all__ = [ - 'mean', - 'mul', - 'reshape', - 'scale', - 'transpose', - 'sigmoid_cross_entropy_with_logits', - 'elementwise_add', - 'elementwise_div', - 'elementwise_sub', - 'elementwise_mul', - 'clip', - 'clip_by_norm', - 'sequence_softmax', + 'mean', 'mul', 'reshape', 'scale', 'transpose', + 'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div', + 'elementwise_sub', 'elementwise_mul', 'clip', 'clip_by_norm', + 'sequence_softmax', 'reduce_sum' ] + __activations__ for _OP in set(__all__): From f189ad74426cf0970bd05016d4a2827ea6c1ea00 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 17 Jan 2018 17:27:54 +0800 Subject: [PATCH 03/22] refine the defination of class GradientClipByGlobalNorm --- python/paddle/v2/fluid/clip.py | 47 +++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index f0904e18ea..fcdd4c29e4 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -93,41 +93,48 @@ class GradientClipByNorm(BaseGradientClipAttr): class GradientClipByGlobalNorm(BaseGradientClipAttr): global_norm_var = None clip_norm_var = None - ratio_var = None + scale_var = None @classmethod def init(cls, clip_norm): + if not (isinstance(clip_norm, int) or isinstance(clip_norm, float)): + raise TypeError("The 'clip_norm' must be a value of int or float") + cls.global_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=0.0) cls.clip_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=clip_norm) - def __init__(self): - if not (isinstance(self.__class__.global_norm_var, Variable) and - isinstance(self.__class__.clip_norm_var, Variable)): + @classmethod + def check_init(cls): + if not (isinstance(cls.global_norm_var, Variable) and + isinstance(cls.clip_norm_var, Variable)): raise ValueError( - "Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first." - ) + "Class 'GradientClipByGlobalNorm' has not been properly initialized. \ + Please call GradientClipByGlobalNorm.init() first.") + + @classmethod + def process_context(cls, context, param, grad): + cls.check_init() - def process_context(self, context, param, grad): local_norm_var = layers.reduce_sum( x=layers.pow(x=grad, factor=2), reduce_all=True) layers.sums( - input=[local_norm_var, self.__class__.global_norm_var], - out=[self.__class__.global_norm_var]) + input=[local_norm_var, cls.global_norm_var], + out=[cls.global_norm_var]) - def create_operators(self, param, grad): - if self.__class__.ratio_var is None: - self.__class__.global_norm_var = layers.sqrt( - x=self.__class__.global_norm_var) - self.__class__.ratio_var = layers.elementwise_div( - x=self.__class__.clip_norm_var, + @classmethod + def create_operators(cls, param, grad): + cls.check_init() + + if cls.scale_var is None: + cls.global_norm_var = layers.sqrt(x=cls.global_norm_var) + cls.scale_var = layers.elementwise_div( + x=cls.clip_norm_var, y=layers.elementwise_max( - x=self.__class__.clip_norm_var, - y=self.__class__.global_norm_var)) - # 缺乏elementwise_max - # 没法将ratio_var送给scale_op。 - # new_grad = layers. + x=cls.clip_norm_var, y=cls.global_norm_var)) + new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var) + return param, new_grad def append_gradient_clip_ops(param_grad): From 4cb6e72b85fef0205a3d3ebfd136e11c009e39f6 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 17 Jan 2018 18:43:54 +0800 Subject: [PATCH 04/22] refine code details --- python/paddle/v2/fluid/clip.py | 18 +++++++-------- python/paddle/v2/fluid/framework.py | 2 +- python/paddle/v2/fluid/param_attr.py | 22 +++++++++---------- .../tests/book/test_recognize_digits_mlp.py | 18 +++++++-------- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index d8240dc155..f7917fc142 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -1,16 +1,16 @@ # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import functools import layers from framework import Variable @@ -162,7 +162,7 @@ def append_gradient_clip_ops(param_grad): context = dict() create_op_callbacks = [] for p, g in param_grad: - clip_attr = getattr(p, 'clip_attr', NullGradientClipAttr()) + clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr()) if clip_attr is None: clip_attr = NullGradientClipAttr() if not isinstance(clip_attr, BaseGradientClipAttr): diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 8042febfed..9128a0eebe 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -946,7 +946,7 @@ class Parameter(Variable): self.regularizer = kwargs.get('regularizer', None) - self.clip_attr = kwargs.get('clip_attr', None) + self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) # program is a global instance. diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py index 3af0190590..8c8de0d104 100644 --- a/python/paddle/v2/fluid/param_attr.py +++ b/python/paddle/v2/fluid/param_attr.py @@ -1,16 +1,16 @@ # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from initializer import Initializer, Xavier, Constant from regularizer import WeightDecayRegularizer @@ -24,13 +24,13 @@ class ParamAttr(object): learning_rate=1.0, regularizer=None, trainable=True, - clip=None): + gradient_clip=None): self.name = name self.initializer = initializer self.learning_rate = learning_rate self.regularizer = regularizer self.trainable = trainable - self.clip = clip + self.gradient_clip = gradient_clip def set_default_initializer(self, initializer): if initializer is None: @@ -76,7 +76,7 @@ class ParamAttr(object): }, 'regularizer': self.regularizer, 'trainable': self.trainable, - 'clip_attr': self.clip + 'gradient_clip_attr': self.gradient_clip } if with_initializer: kwargs['initializer'] = self.initializer diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 02da2fcc85..e614e5e3f1 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -1,16 +1,16 @@ # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function import numpy as np import paddle.v2 as paddle @@ -26,7 +26,7 @@ hidden1 = fluid.layers.fc(input=image, act='relu', param_attr=fluid.ParamAttr( regularizer=regularizer, - clip=fluid.clip.ClipByValue(10))) + gradient_clip=fluid.clip.ClipByValue(10))) hidden2 = fluid.layers.fc(input=hidden1, size=64, From 6ebfade465be5526939b52b0d251486298c4c734 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 17 Jan 2018 19:14:05 +0800 Subject: [PATCH 05/22] fix copyright information --- paddle/gserver/tests/sequence_recurrent_group.py | 13 +++++++++++++ .../paddle/v2/fluid/tests/test_edit_distance_op.py | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py index a1d54542e3..1343f2956f 100644 --- a/paddle/gserver/tests/sequence_recurrent_group.py +++ b/paddle/gserver/tests/sequence_recurrent_group.py @@ -1,3 +1,16 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. #!/usr/bin/env python # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # diff --git a/python/paddle/v2/fluid/tests/test_edit_distance_op.py b/python/paddle/v2/fluid/tests/test_edit_distance_op.py index 38e87728b3..cf118df634 100644 --- a/python/paddle/v2/fluid/tests/test_edit_distance_op.py +++ b/python/paddle/v2/fluid/tests/test_edit_distance_op.py @@ -1,3 +1,16 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. import unittest import numpy as np from op_test import OpTest From 1dac173b518faeb8f31c321a61fa287b8de4246e Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 17 Jan 2018 20:15:03 +0800 Subject: [PATCH 06/22] add API for clip_by_global_norm --- python/paddle/v2/fluid/clip.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index f7917fc142..d1e6987e01 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -13,7 +13,7 @@ # limitations under the License. import functools import layers -from framework import Variable +import framework from . import core __all__ = [ @@ -128,8 +128,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): @classmethod def check_init(cls): - if not (isinstance(cls.global_norm_var, Variable) and - isinstance(cls.clip_norm_var, Variable)): + if not (isinstance(cls.global_norm_var, framework.Variable) and + isinstance(cls.clip_norm_var, framework.Variable)): raise ValueError( "Class 'GradientClipByGlobalNorm' has not been properly initialized. \ Please call GradientClipByGlobalNorm.init() first.") @@ -158,6 +158,23 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): return param, new_grad +def gradient_clip_by_global_norm(clip_norm, param_list=None, program=None): + if program is None: + program = framework.default_main_program() + if param_list is None: + param_list = program.block(0).all_parameters() + if all(isinstance(elem, basestring) for elem in param_list): + param_list = [program.block(0).var(elem) for elem in param_list] + if not all(isinstance(elem, framework.Parameter) for elem in param_list): + raise TypeError( + "'param_list' should be a list of Parameter or basestring(parameter's name)." + ) + + GradientClipByGlobalNorm.init(clip_norm) + for param in param_list: + param.gradient_clip_attr = GradientClipByGlobalNorm() + + def append_gradient_clip_ops(param_grad): context = dict() create_op_callbacks = [] From 958d07bee3343288f9813693b5a85150a5131cdd Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 17 Jan 2018 20:21:05 +0800 Subject: [PATCH 07/22] fix a error --- python/paddle/v2/fluid/framework.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 9128a0eebe..91fdb5fa7e 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -777,7 +777,7 @@ class Block(object): trainable=p.trainable, optimize_attr=p.optimize_attr, regularizer=p.regularizer, - clip_attr=p.clip_attr, + gradient_clip_attr=p.gradient_clip_attr, error_clip=p.error_clip, name=v.name) self.vars[new_p.name] = new_p From a247972ddad05490a7b72911521bff0b48cf2d1c Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 17 Jan 2018 20:31:05 +0800 Subject: [PATCH 08/22] fix a error --- python/paddle/v2/fluid/clip.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index d1e6987e01..7a36df0dab 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -134,8 +134,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): "Class 'GradientClipByGlobalNorm' has not been properly initialized. \ Please call GradientClipByGlobalNorm.init() first.") - @classmethod - def process_context(cls, context, param, grad): + def process_context(self, context, param, grad): + cls = self.__class__ cls.check_init() local_norm_var = layers.reduce_sum( @@ -144,8 +144,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): input=[local_norm_var, cls.global_norm_var], out=[cls.global_norm_var]) - @classmethod - def create_operators(cls, param, grad): + def create_operators(self, param, grad): + cls = self.__class__ cls.check_init() if cls.scale_var is None: From 773f2f735c235afcc6ea40ddc2af23fe7a69a2e9 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 18 Jan 2018 21:06:51 +0800 Subject: [PATCH 09/22] fix errors --- python/paddle/v2/fluid/clip.py | 5 +++-- python/paddle/v2/fluid/layers/ops.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index 7a36df0dab..d4f025a4af 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -138,8 +138,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls = self.__class__ cls.check_init() - local_norm_var = layers.reduce_sum( - x=layers.pow(x=grad, factor=2), reduce_all=True) + local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0)) layers.sums( input=[local_norm_var, cls.global_norm_var], out=[cls.global_norm_var]) @@ -154,6 +153,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): x=cls.clip_norm_var, y=layers.elementwise_max( x=cls.clip_norm_var, y=cls.global_norm_var)) + assert cls.scale_var.shape == (1L, ) + new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var) return param, new_grad diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index dd3197fc00..a2055c5d7b 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -48,7 +48,7 @@ __all__ = [ 'mean', 'mul', 'reshape', 'scale', 'transpose', 'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div', 'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min', - 'clip', 'clip_by_norm', 'sequence_softmax', 'reduce_sum' + 'clip', 'clip_by_norm', 'sequence_softmax' ] + __activations__ for _OP in set(__all__): From 9c0b29014cd38c36f6a599b7c6477000db30917d Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Fri, 19 Jan 2018 12:29:41 +0800 Subject: [PATCH 10/22] Make compare_op reuse elemwise_op_funcs --- paddle/operators/compare_op.cc | 11 +++++----- paddle/operators/compare_op.cu | 4 ---- paddle/operators/compare_op.h | 22 ++----------------- paddle/operators/elementwise_op_function.h | 12 +++++----- .../paddle/v2/fluid/tests/test_compare_op.py | 2 -- 5 files changed, 14 insertions(+), 37 deletions(-) diff --git a/paddle/operators/compare_op.cc b/paddle/operators/compare_op.cc index daa2c193b4..930c295a9c 100644 --- a/paddle/operators/compare_op.cc +++ b/paddle/operators/compare_op.cc @@ -39,6 +39,11 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is calculated by %s )DOC", comment.type, comment.equation)); + AddAttr("axis", + "(int, default -1). The start dimension index " + "for broadcasting Y onto X.") + .SetDefault(-1) + .EqualGreaterThan(-1); } }; @@ -95,11 +100,5 @@ REGISTER_LOGICAL_OP(less_than, "Out = X < Y"); REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor); REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y"); REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor); -REGISTER_LOGICAL_OP(greater_than, "Out = X > Y"); -REGISTER_LOGICAL_KERNEL(greater_than, CPU, - paddle::operators::GreaterThanFunctor); -REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y"); -REGISTER_LOGICAL_KERNEL(greater_equal, CPU, - paddle::operators::GreaterEqualFunctor); REGISTER_LOGICAL_OP(equal, "Out = X == Y"); REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor); diff --git a/paddle/operators/compare_op.cu b/paddle/operators/compare_op.cu index 26049271be..f625824dbc 100644 --- a/paddle/operators/compare_op.cu +++ b/paddle/operators/compare_op.cu @@ -16,8 +16,4 @@ limitations under the License. */ REGISTER_LOGICAL_KERNEL(less_than, CUDA, paddle::operators::LessThanFunctor); REGISTER_LOGICAL_KERNEL(less_equal, CUDA, paddle::operators::LessEqualFunctor); -REGISTER_LOGICAL_KERNEL(greater_than, CUDA, - paddle::operators::GreaterThanFunctor); -REGISTER_LOGICAL_KERNEL(greater_equal, CUDA, - paddle::operators::GreaterEqualFunctor); REGISTER_LOGICAL_KERNEL(equal, CUDA, paddle::operators::EqualFunctor); diff --git a/paddle/operators/compare_op.h b/paddle/operators/compare_op.h index 567e89c0a7..15e9cfcaab 100644 --- a/paddle/operators/compare_op.h +++ b/paddle/operators/compare_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include "paddle/framework/op_registry.h" +#include "paddle/operators/elementwise_op_function.h" #include "paddle/platform/transform.h" namespace paddle { @@ -33,18 +34,6 @@ struct LessEqualFunctor { HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; } }; -template -struct GreaterThanFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; } -}; - -template -struct GreaterEqualFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; } -}; - template struct EqualFunctor { using ELEM_TYPE = T; @@ -65,14 +54,7 @@ class CompareOpKernel public: void Compute(const framework::ExecutionContext& context) const override { using T = typename Functor::ELEM_TYPE; - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* out = context.Output("Out"); - Functor binary_func; - platform::Transform trans; - trans(context.template device_context(), x->data(), - x->data() + x->numel(), y->data(), - out->mutable_data(context.GetPlace()), binary_func); + ElementwiseComputeEx(context); } }; diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h index db5d30c1af..e6f3e39ece 100644 --- a/paddle/operators/elementwise_op_function.h +++ b/paddle/operators/elementwise_op_function.h @@ -176,14 +176,15 @@ class MidWiseTransformIterator }; #endif -template +template class TransformFunctor { public: TransformFunctor(const framework::Tensor* x, const framework::Tensor* y, framework::Tensor* z, const DeviceContext& ctx, Functor func) : x_(x->data()), y_(y->data()), - z_(z->mutable_data(ctx.GetPlace())), + z_(z->mutable_data(ctx.GetPlace())), nx_(x->numel()), ctx_(ctx), func_(func) {} @@ -208,7 +209,7 @@ class TransformFunctor { private: const T* x_; const T* y_; - T* z_; + OutType* z_; int64_t nx_; const DeviceContext& ctx_; Functor func_; @@ -364,7 +365,8 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx) { } } -template +template void ElementwiseComputeEx(const framework::ExecutionContext& ctx) { using Tensor = framework::Tensor; @@ -372,7 +374,7 @@ void ElementwiseComputeEx(const framework::ExecutionContext& ctx) { auto* y = ctx.Input("Y"); auto* z = ctx.Output("Out"); z->mutable_data(ctx.GetPlace()); - TransformFunctor functor( + TransformFunctor functor( x, y, z, ctx.template device_context(), Functor()); auto x_dims = x->dims(); diff --git a/python/paddle/v2/fluid/tests/test_compare_op.py b/python/paddle/v2/fluid/tests/test_compare_op.py index fbf8921e40..00e781c616 100644 --- a/python/paddle/v2/fluid/tests/test_compare_op.py +++ b/python/paddle/v2/fluid/tests/test_compare_op.py @@ -37,8 +37,6 @@ def create_test_class(op_type, typename, callback): for _type_name in {'float32', 'float64', 'int32', 'int64'}: create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b) - create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b) - create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b) create_test_class('equal', _type_name, lambda _a, _b: _a == _b) if __name__ == '__main__': From 42b0748ab4f797902cadad4b5278a4cb9fdea9bd Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 19 Jan 2018 15:12:39 +0800 Subject: [PATCH 11/22] add unittest --- python/paddle/v2/fluid/clip.py | 14 +++- .../{test_clip.py => test_error_clip.py} | 0 .../v2/fluid/tests/test_gradient_clip.py | 82 +++++++++++++++++++ 3 files changed, 93 insertions(+), 3 deletions(-) rename python/paddle/v2/fluid/tests/{test_clip.py => test_error_clip.py} (100%) create mode 100644 python/paddle/v2/fluid/tests/test_gradient_clip.py diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index d4f025a4af..f6ff83924f 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -113,6 +113,7 @@ class GradientClipByNorm(BaseGradientClipAttr): class GradientClipByGlobalNorm(BaseGradientClipAttr): global_norm_var = None + local_norm_var = None clip_norm_var = None scale_var = None @@ -123,12 +124,18 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls.global_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=0.0) + cls.local_norm_var = framework.default_main_program().current_block( + ).create_var( + name=framework.unique_name("local_norm"), + dtype="float32", + persistable=False) cls.clip_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=clip_norm) @classmethod def check_init(cls): if not (isinstance(cls.global_norm_var, framework.Variable) and + isinstance(cls.local_norm_var, framework.Variable) and isinstance(cls.clip_norm_var, framework.Variable)): raise ValueError( "Class 'GradientClipByGlobalNorm' has not been properly initialized. \ @@ -138,9 +145,10 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls = self.__class__ cls.check_init() - local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0)) + cls.local_norm_var = layers.reduce_sum( + input=layers.pow(x=grad, factor=2.0)) layers.sums( - input=[local_norm_var, cls.global_norm_var], + input=[cls.local_norm_var, cls.global_norm_var], out=[cls.global_norm_var]) def create_operators(self, param, grad): @@ -148,7 +156,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls.check_init() if cls.scale_var is None: - cls.global_norm_var = layers.sqrt(x=cls.global_norm_var) + layers.sqrt(x=cls.global_norm_var, out=cls.global_norm_var) cls.scale_var = layers.elementwise_div( x=cls.clip_norm_var, y=layers.elementwise_max( diff --git a/python/paddle/v2/fluid/tests/test_clip.py b/python/paddle/v2/fluid/tests/test_error_clip.py similarity index 100% rename from python/paddle/v2/fluid/tests/test_clip.py rename to python/paddle/v2/fluid/tests/test_error_clip.py diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py new file mode 100644 index 0000000000..4fb7f0b2cb --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle.v2 as paddle +import paddle.v2.fluid as fluid + + +def _get_global_param_norm_(params_grads): + res = fluid.layers.fill_constant(shape=[1], dtype="float32", value=0.0) + for _, grad in params_grads: + norm_var = fluid.layers.reduce_sum( + input=fluid.layers.pow(x=grad, factor=2.0)) + fluid.layers.sums(input=[norm_var, res], out=[res]) + fluid.layers.sqrt(x=res, out=res) + return res + + +BATCH_SIZE = 128 +CLIP = 0.5 +prog = fluid.framework.Program() + +with fluid.program_guard(main_program=prog): + image = fluid.layers.data(name='x', shape=[784], dtype='float32') + + hidden1 = fluid.layers.fc(input=image, size=128, act='relu') + hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu') + predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') + + label = fluid.layers.data(name='y', shape=[1], dtype='int64') + + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + +prog_clip = prog.clone() + +avg_cost_clip = prog_clip.block(0).var(avg_cost.name) + +p_g = fluid.backward.append_backward(loss=avg_cost) +p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip) + +with fluid.program_guard(main_program=prog): + gloabl_norm = _get_global_param_norm_(p_g) + +with fluid.program_guard(main_program=prog_clip): + fluid.clip.gradient_clip_by_global_norm(clip_norm=CLIP) + p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) + gloabl_norm_clip = _get_global_param_norm_(p_g_clip) + +train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=BATCH_SIZE) + +place = fluid.CPUPlace() +exe = fluid.Executor(place) +feeder = fluid.DataFeeder(feed_list=[image, label], place=place) +exe.run(fluid.default_startup_program()) + +count = 0 +for data in train_reader(): + count += 1 + if count > 5: + break + out, = exe.run(prog, feed=feeder.feed(data), fetch_list=[gloabl_norm]) + out_clip, = exe.run(prog_clip, + feed=feeder.feed(data), + fetch_list=[gloabl_norm_clip]) + + if not np.allclose(out_clip, np.minimum(out, np.array([CLIP]))): + exit(1) +exit(0) From 408a6b8bb2af4f8f075680bb361daad329ad6eca Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 19 Jan 2018 15:17:35 +0800 Subject: [PATCH 12/22] tiny fix --- python/paddle/v2/fluid/clip.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index f6ff83924f..9800ad7c5d 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -124,11 +124,11 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls.global_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=0.0) - cls.local_norm_var = framework.default_main_program().current_block( - ).create_var( - name=framework.unique_name("local_norm"), - dtype="float32", - persistable=False) + cls.local_norm_var = framework.default_main_program().block( + 0).create_var( + name=framework.unique_name("local_norm"), + dtype="float32", + persistable=False) cls.clip_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=clip_norm) From 538f1ad28f766c0e47ef4eef2ec59e187ba30f8e Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 19 Jan 2018 15:24:14 +0800 Subject: [PATCH 13/22] tiny fix --- python/paddle/v2/fluid/clip.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index 9800ad7c5d..d97cd9ecc9 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -124,11 +124,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls.global_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=0.0) - cls.local_norm_var = framework.default_main_program().block( - 0).create_var( - name=framework.unique_name("local_norm"), - dtype="float32", - persistable=False) + cls.local_norm_var = layers.create_tensor(dtype="float32") cls.clip_norm_var = layers.fill_constant( shape=[1], dtype="float32", value=clip_norm) From 22662ae4245a26d51b1c0857939934041e5bee56 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Fri, 19 Jan 2018 16:52:42 +0800 Subject: [PATCH 14/22] Move paddle.v2.fluid.registery to layers * registery is only used by layers * Rename it to layer_function_generator --- python/paddle/v2/fluid/layers/control_flow.py | 13 ++++---- python/paddle/v2/fluid/layers/device.py | 6 ++-- .../layer_function_generator.py} | 29 ++++++++++------- python/paddle/v2/fluid/layers/ops.py | 4 +-- python/paddle/v2/fluid/tests/test_registry.py | 32 ++++++++----------- 5 files changed, 43 insertions(+), 41 deletions(-) rename python/paddle/v2/fluid/{registry.py => layers/layer_function_generator.py} (93%) diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index e72b22c83f..f333c3c26e 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -11,12 +11,13 @@ #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. -from ..layer_helper import LayerHelper, unique_name -from ..framework import Program, Variable, Operator -from .. import core -from tensor import assign, fill_constant import contextlib -from ..registry import autodoc + +from layer_function_generator import autodoc +from tensor import assign, fill_constant +from .. import core +from ..framework import Program, Variable, Operator +from ..layer_helper import LayerHelper, unique_name __all__ = [ 'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard', @@ -1457,7 +1458,7 @@ class DynamicRNN(object): method)) -@autodoc +@autodoc() def reorder_lod_tensor_by_rank(x, rank_table): helper = LayerHelper('reorder_lod_tensor_by_rank', **locals()) helper.is_instance('x', Variable) diff --git a/python/paddle/v2/fluid/layers/device.py b/python/paddle/v2/fluid/layers/device.py index ef74b2b2f0..b586a213ca 100644 --- a/python/paddle/v2/fluid/layers/device.py +++ b/python/paddle/v2/fluid/layers/device.py @@ -15,14 +15,14 @@ All util layers. """ -from ..layer_helper import LayerHelper +from layer_function_generator import autodoc from ..framework import unique_name -from ..registry import autodoc +from ..layer_helper import LayerHelper __all__ = ['get_places'] -@autodoc +@autodoc() def get_places(device_count=None, device_type=None): helper = LayerHelper('get_places', **locals()) out_places = helper.create_variable(name=unique_name(helper.name + ".out")) diff --git a/python/paddle/v2/fluid/registry.py b/python/paddle/v2/fluid/layers/layer_function_generator.py similarity index 93% rename from python/paddle/v2/fluid/registry.py rename to python/paddle/v2/fluid/layers/layer_function_generator.py index 6c0c3a3518..0c4cda86a3 100644 --- a/python/paddle/v2/fluid/registry.py +++ b/python/paddle/v2/fluid/layers/layer_function_generator.py @@ -11,19 +11,21 @@ #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. -import re import cStringIO -import warnings import functools -import inspect +import re +import warnings -import proto.framework_pb2 as framework_pb2 -from framework import OpProtoHolder, Variable, Program, Operator -from paddle.v2.fluid.layer_helper import LayerHelper, unique_name +from .. import proto + +framework_pb2 = proto.framework_pb2 + +from ..framework import OpProtoHolder, Variable +from ..layer_helper import LayerHelper __all__ = [ 'deprecated', - 'register_layer', + 'generate_layer_fn', 'autodoc', ] @@ -96,7 +98,7 @@ def _generate_doc_string_(op_proto): return buf.getvalue() -def register_layer(op_type): +def generate_layer_fn(op_type): """Register the Python layer for an Operator. Args: @@ -202,7 +204,10 @@ def deprecated(func_or_class): return func_wrapper -def autodoc(func): - func.__doc__ = _generate_doc_string_(OpProtoHolder.instance().get_op_proto( - func.__name__)) - return func +def autodoc(comment=""): + def __impl__(func): + func.__doc__ = _generate_doc_string_(OpProtoHolder.instance( + ).get_op_proto(func.__name__)) + comment + return func + + return __impl__ diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index 21945edf08..24f6b7294e 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -11,7 +11,7 @@ #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. -from ..registry import register_layer +from layer_function_generator import generate_layer_fn __activations__ = [ 'sigmoid', @@ -62,4 +62,4 @@ __all__ = [ ] + __activations__ for _OP in set(__all__): - globals()[_OP] = register_layer(_OP) + globals()[_OP] = generate_layer_fn(_OP) diff --git a/python/paddle/v2/fluid/tests/test_registry.py b/python/paddle/v2/fluid/tests/test_registry.py index dba1189630..b1749a76f0 100644 --- a/python/paddle/v2/fluid/tests/test_registry.py +++ b/python/paddle/v2/fluid/tests/test_registry.py @@ -1,35 +1,31 @@ # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import unittest -import warnings import paddle.v2.fluid as fluid -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.registry as registry +import numpy as np +import decorators class TestRegistry(unittest.TestCase): + @decorators.prog_scope() def test_registry_layer(self): - self.layer_type = "mean" - program = framework.Program() - x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32') - output = layers.mean(x) + output = fluid.layers.mean(x=x) + place = fluid.CPUPlace() exe = fluid.Executor(place) - X = np.random.random((10, 10)).astype("float32") - mean_out = exe.run(program, feed={"X": X}, fetch_list=[output]) + mean_out = exe.run(feed={"X": X}, fetch_list=[output]) self.assertAlmostEqual(np.mean(X), mean_out) From 9a97c7f745b0014504f9c52897b72fd58147e70a Mon Sep 17 00:00:00 2001 From: ying Date: Wed, 17 Jan 2018 20:09:44 +0800 Subject: [PATCH 15/22] add wmt16 into dataset. --- python/paddle/v2/dataset/__init__.py | 16 +- python/paddle/v2/dataset/common.py | 21 +- python/paddle/v2/dataset/tests/wmt16_test.py | 66 ++++ python/paddle/v2/dataset/wmt14.py | 18 +- python/paddle/v2/dataset/wmt16.py | 348 ++++++++++++++++++ python/paddle/v2/fluid/layers/control_flow.py | 33 +- 6 files changed, 482 insertions(+), 20 deletions(-) create mode 100644 python/paddle/v2/dataset/tests/wmt16_test.py create mode 100644 python/paddle/v2/dataset/wmt16.py diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 90830515c1..c1acbecd9c 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -24,11 +24,23 @@ import conll05 import uci_housing import sentiment import wmt14 +import wmt16 import mq2007 import flowers import voc2012 __all__ = [ - 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' - 'uci_housing', 'wmt14', 'mq2007', 'flowers', 'voc2012' + 'mnist', + 'imikolov', + 'imdb', + 'cifar', + 'movielens', + 'conll05', + 'sentiment' + 'uci_housing', + 'wmt14', + 'wmt16', + 'mq2007', + 'flowers', + 'voc2012', ] diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index fab8a68b0b..9aba35a648 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -25,8 +25,12 @@ import glob import cPickle as pickle __all__ = [ - 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', - 'convert' + 'DATA_HOME', + 'download', + 'md5file', + 'split', + 'cluster_files_reader', + 'convert', ] DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') @@ -58,12 +62,15 @@ def md5file(fname): return hash_md5.hexdigest() -def download(url, module_name, md5sum): +def download(url, module_name, md5sum, save_name=None): dirname = os.path.join(DATA_HOME, module_name) if not os.path.exists(dirname): os.makedirs(dirname) - filename = os.path.join(dirname, url.split('/')[-1]) + filename = os.path.join(dirname, + url.split('/')[-1] + if save_name is None else save_name) + retry = 0 retry_limit = 3 while not (os.path.exists(filename) and md5file(filename) == md5sum): @@ -196,9 +203,11 @@ def convert(output_path, reader, line_count, name_prefix): Convert data from reader to recordio format files. :param output_path: directory in which output files will be saved. - :param reader: a data reader, from which the convert program will read data instances. + :param reader: a data reader, from which the convert program will read + data instances. :param name_prefix: the name prefix of generated files. - :param max_lines_to_shuffle: the max lines numbers to shuffle before writing. + :param max_lines_to_shuffle: the max lines numbers to shuffle before + writing. """ assert line_count >= 1 diff --git a/python/paddle/v2/dataset/tests/wmt16_test.py b/python/paddle/v2/dataset/tests/wmt16_test.py new file mode 100644 index 0000000000..cef6c3216e --- /dev/null +++ b/python/paddle/v2/dataset/tests/wmt16_test.py @@ -0,0 +1,66 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2.dataset.wmt16 +import unittest + + +class TestWMT16(unittest.TestCase): + def checkout_one_sample(self, sample): + # train data has 3 field: source language word indices, + # target language word indices, and target next word indices. + self.assertEqual(len(sample), 3) + + # test start mark and end mark in source word indices. + self.assertEqual(sample[0][0], 0) + self.assertEqual(sample[0][-1], 1) + + # test start mask in target word indices + self.assertEqual(sample[1][0], 0) + + # test en mask in target next word indices + self.assertEqual(sample[2][-1], 1) + + def test_train(self): + for idx, sample in enumerate( + paddle.v2.dataset.wmt16.train( + src_dict_size=100000, trg_dict_size=100000)()): + if idx >= 10: break + self.checkout_one_sample(sample) + + def test_test(self): + for idx, sample in enumerate( + paddle.v2.dataset.wmt16.test( + src_dict_size=1000, trg_dict_size=1000)()): + if idx >= 10: break + self.checkout_one_sample(sample) + + def test_val(self): + for idx, sample in enumerate( + paddle.v2.dataset.wmt16.validation( + src_dict_size=1000, trg_dict_size=1000)()): + if idx >= 10: break + self.checkout_one_sample(sample) + + def test_get_dict(self): + dict_size = 1000 + word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True) + self.assertEqual(len(word_dict), dict_size) + self.assertEqual(word_dict[0], "") + self.assertEqual(word_dict[1], "") + self.assertEqual(word_dict[2], "") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 95a35d97ce..1e54a4999b 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -25,12 +25,20 @@ import gzip import paddle.v2.dataset.common from paddle.v2.parameters import Parameters -__all__ = ['train', 'test', 'build_dict', 'convert'] - -URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' +__all__ = [ + 'train', + 'test', + 'get_dict', + 'convert', +] + +URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/' + 'cslm_joint_paper/data/dev+test.tgz') MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' -# this is a small set of data for test. The original data is too large and will be add later. -URL_TRAIN = 'http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz' +# this is a small set of data for test. The original data is too large and +# will be add later. +URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/' + 'wmt_shrinked_data/wmt14.tgz') MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c' # BLEU of this trained model is 26.92 URL_MODEL = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.gz' diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/v2/dataset/wmt16.py new file mode 100644 index 0000000000..a1899f20b5 --- /dev/null +++ b/python/paddle/v2/dataset/wmt16.py @@ -0,0 +1,348 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +ACL2016 Multimodal Machine Translation. Please see this websit for more details: +http://www.statmt.org/wmt16/multimodal-task.html#task1 + +If you use the dataset created for your task, please cite the following paper: +Multi30K: Multilingual English-German Image Descriptions. + +@article{elliott-EtAl:2016:VL16, + author = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.}, + title = {Multi30K: Multilingual English-German Image Descriptions}, + booktitle = {Proceedings of the 6th Workshop on Vision and Language}, + year = {2016}, + pages = {70--74}, + year = 2016 +} +""" + +import os +import tarfile +import gzip +from collections import defaultdict + +import paddle.v2.dataset.common + +__all__ = [ + "train", + "test", + "validation", + "convert", + "fetch", + "get_dict", +] + +DATA_URL = ("http://cloud.dlnel.org/filepub/" + "?uuid=46a0808e-ddd8-427c-bacd-0dbc6d045fed") +DATA_MD5 = "0c38be43600334966403524a40dcd81e" + +TOTAL_EN_WORDS = 11250 +TOTAL_DE_WORDS = 19220 + +START_MARK = "" +END_MARK = "" +UNK_MARK = "" + + +def __build_dict__(tar_file, dict_size, save_path, lang): + word_dict = defaultdict(int) + with tarfile.open(tar_file, mode="r") as f: + for line in f.extractfile("wmt16/train"): + line_split = line.strip().split("\t") + if len(line_split) != 2: continue + sen = line_split[0] if lang == "en" else line_split[1] + for w in sen.split(): + word_dict[w] += 1 + + with open(save_path, "w") as fout: + fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)) + for idx, word in enumerate( + sorted( + word_dict.iteritems(), key=lambda x: x[1], reverse=True)): + if idx + 3 == dict_size: break + fout.write("%s\n" % (word[0])) + + +def __load_dict__(tar_file, dict_size, lang, reverse=False): + dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, + "wmt16/%s_%d.dict" % (lang, dict_size)) + if not os.path.exists(dict_path) or ( + len(open(dict_path, "r").readlines()) != dict_size): + __build_dict__(tar_file, dict_size, dict_path, lang) + + word_dict = {} + with open(dict_path, "r") as fdict: + for idx, line in enumerate(fdict): + if reverse: + word_dict[idx] = line.strip() + else: + word_dict[line.strip()] = idx + return word_dict + + +def __get_dict_size__(src_dict_size, trg_dict_size, src_lang): + src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else + TOTAL_DE_WORDS)) + trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else + TOTAL_ENG_WORDS)) + return src_dict_size, trg_dict_size + + +def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): + def reader(): + src_dict = __load_dict__(tar_file, src_dict_size, src_lang) + trg_dict = __load_dict__(tar_file, trg_dict_size, + ("de" if src_lang == "en" else "en")) + + # the indice for start mark, end mark, and unk are the same in source + # language and target language. Here uses the source language + # dictionary to determine their indices. + start_id = src_dict[START_MARK] + end_id = src_dict[END_MARK] + unk_id = src_dict[UNK_MARK] + + src_col = 0 if src_lang == "en" else 1 + trg_col = 1 - src_col + + with tarfile.open(tar_file, mode="r") as f: + for line in f.extractfile(file_name): + line_split = line.strip().split("\t") + if len(line_split) != 2: + continue + src_words = line_split[src_col].split() + src_ids = [start_id] + [ + src_dict.get(w, unk_id) for w in src_words + ] + [end_id] + + trg_words = line_split[trg_col].split() + trg_ids = [trg_dict.get(w, unk_id) for w in trg_words] + + trg_ids_next = trg_ids + [end_id] + trg_ids = [start_id] + trg_ids + + yield src_ids, trg_ids, trg_ids_next + + return reader + + +def train(src_dict_size, trg_dict_size, src_lang="en"): + """ + WMT16 train set reader. + + This function returns the reader for train data. Each sample the reader + returns is made up of three fields: the source language word index sequence, + target language word index sequence and next word index sequence. + + + NOTE: + The original like for training data is: + http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz + + paddle.dataset.wmt16 provides a tokenized version of the original dataset by + using moses's tokenization script: + https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl + + Args: + src_dict_size(int): Size of the source language dictionary. Three + special tokens will be added into the dictionary: + for start mark, for end mark, and for + unknown word. + trg_dict_size(int): Size of the target language dictionary. Three + special tokens will be added into the dictionary: + for start mark, for end mark, and for + unknown word. + src_lang(string): A string indicating which language is the source + language. Available options are: "en" for English + and "de" for Germany. + + Returns: + callable: The train reader. + """ + + assert (src_lang in ["en", "de"], ("An error language type. Only support: " + "en (for English); de(for Germany)")) + src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size, + trg_dict_size, src_lang) + + return reader_creator( + tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), + file_name="wmt16/train", + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang) + + +def test(src_dict_size, trg_dict_size, src_lang="en"): + """ + WMT16 test set reader. + + This function returns the reader for test data. Each sample the reader + returns is made up of three fields: the source language word index sequence, + target language word index sequence and next word index sequence. + + NOTE: + The original like for test data is: + http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz + + paddle.dataset.wmt16 provides a tokenized version of the original dataset by + using moses's tokenization script: + https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl + + Args: + src_dict_size(int): Size of the source language dictionary. Three + special tokens will be added into the dictionary: + for start mark, for end mark, and for + unknown word. + trg_dict_size(int): Size of the target language dictionary. Three + special tokens will be added into the dictionary: + for start mark, for end mark, and for + unknown word. + src_lang(string): A string indicating which language is the source + language. Available options are: "en" for English + and "de" for Germany. + + Returns: + callable: The test reader. + """ + + assert (src_lang in ["en", "de"], + ("An error language type. " + "Only support: en (for English); de(for Germany)")) + + src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size, + trg_dict_size, src_lang) + + return reader_creator( + tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), + file_name="wmt16/test", + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang) + + +def validation(src_dict_size, trg_dict_size, src_lang="en"): + """ + WMT16 validation set reader. + + This function returns the reader for validation data. Each sample the reader + returns is made up of three fields: the source language word index sequence, + target language word index sequence and next word index sequence. + + NOTE: + The original like for validation data is: + http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz + + paddle.dataset.wmt16 provides a tokenized version of the original dataset by + using moses's tokenization script: + https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl + + Args: + src_dict_size(int): Size of the source language dictionary. Three + special tokens will be added into the dictionary: + for start mark, for end mark, and for + unknown word. + trg_dict_size(int): Size of the target language dictionary. Three + special tokens will be added into the dictionary: + for start mark, for end mark, and for + unknown word. + src_lang(string): A string indicating which language is the source + language. Available options are: "en" for English + and "de" for Germany. + + Returns: + callable: The validation reader. + """ + assert (src_lang in ["en", "de"], + ("An error language type. " + "Only support: en (for English); de(for Germany)")) + src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size, + trg_dict_size, src_lang) + + return reader_creator( + tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), + file_name="wmt16/val", + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang) + + +def get_dict(lang, dict_size, reverse=False): + """ + return the word dictionary for the specified language. + + Args: + lang(string): A string indicating which language is the source + language. Available options are: "en" for English + and "de" for Germany. + dict_size(int): Size of the specified language dictionary. + reverse(bool): If reverse is set to False, the returned python + dictionary will use word as key and use index as value. + If reverse is set to True, the returned python + dictionary will use index as key and word as value. + + Returns: + dict: The word dictionary for the specific language. + """ + + if lang == "en": dict_size = min(dict_size, TOTAL_EN_WORDS) + else: dict_size = min(dict_size, TOTAL_DE_WORDS) + + dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, + "wmt16/%s_%d.dict" % (lang, dict_size)) + assert (os.path.exists(dict_path), "Word dictionary does not exist. " + "Please invoke paddle.dataset.wmt16.train/test/validation " + "first to build the dictionary.") + tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz") + return __load_dict__(tar_file, dict_size, lang, reverse) + + +def fetch(): + """download the entire dataset. + """ + paddle.v4.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz") + + +def convert(path, src_dict_size, trg_dict_size, src_lang): + """Converts dataset to recordio format. + """ + + paddle.v2.dataset.common.convert( + path, + train( + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang), + 1000, + "wmt16_train") + paddle.v2.dataset.common.convert( + path, + test( + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang), + 1000, + "wmt16_test") + paddle.v2.dataset.common.convert( + path, + validation( + src_dict_size=src_dict_size, + trg_dict_size=trg_dict_size, + src_lang=src_lang), + 1000, + "wmt16_validation") diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index e72b22c83f..b2183ebda1 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -19,13 +19,32 @@ import contextlib from ..registry import autodoc __all__ = [ - 'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard', - 'BlockGuardWithCompletion', 'StaticRNNMemoryLink', 'WhileGuard', 'While', - 'lod_rank_table', 'max_sequence_len', 'topk', 'lod_tensor_to_array', - 'array_to_lod_tensor', 'increment', 'array_write', 'create_array', - 'less_than', 'array_read', 'shrink_memory', 'array_length', 'IfElse', - 'DynamicRNN', 'ConditionalBlock', 'StaticRNN', 'reorder_lod_tensor_by_rank', - 'ParallelDo', 'Print' + 'split_lod_tensor', + 'merge_lod_tensor', + 'BlockGuard', + 'BlockGuardWithCompletion', + 'StaticRNNMemoryLink', + 'WhileGuard', + 'While', + 'lod_rank_table', + 'max_sequence_len', + 'topk', + 'lod_tensor_to_array', + 'array_to_lod_tensor', + 'increment', + 'array_write', + 'create_array', + 'less_than', + 'array_read', + 'shrink_memory', + 'array_length', + 'IfElse', + 'DynamicRNN', + 'ConditionalBlock', + 'StaticRNN', + 'reorder_lod_tensor_by_rank', + 'ParallelDo', + 'Print', ] From 19c554f9e4ef5c96e47f65efd44e2524417e38d7 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 19 Jan 2018 19:19:35 +0800 Subject: [PATCH 16/22] update --- python/paddle/v2/fluid/clip.py | 82 +++++++++---------- .../v2/fluid/tests/test_gradient_clip.py | 44 +++++----- 2 files changed, 59 insertions(+), 67 deletions(-) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index d97cd9ecc9..fb0907c9f4 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -112,58 +112,52 @@ class GradientClipByNorm(BaseGradientClipAttr): class GradientClipByGlobalNorm(BaseGradientClipAttr): - global_norm_var = None - local_norm_var = None - clip_norm_var = None - scale_var = None - - @classmethod - def init(cls, clip_norm): - if not (isinstance(clip_norm, int) or isinstance(clip_norm, float)): - raise TypeError("The 'clip_norm' must be a value of int or float") - - cls.global_norm_var = layers.fill_constant( - shape=[1], dtype="float32", value=0.0) - cls.local_norm_var = layers.create_tensor(dtype="float32") - cls.clip_norm_var = layers.fill_constant( - shape=[1], dtype="float32", value=clip_norm) - - @classmethod - def check_init(cls): - if not (isinstance(cls.global_norm_var, framework.Variable) and - isinstance(cls.local_norm_var, framework.Variable) and - isinstance(cls.clip_norm_var, framework.Variable)): - raise ValueError( - "Class 'GradientClipByGlobalNorm' has not been properly initialized. \ - Please call GradientClipByGlobalNorm.init() first.") + def __init__(self, clip_norm, group_name="default_group"): + if not isinstance(group_name, basestring): + raise TypeError("'group_name' must be a basestring.") + + self.clip_norm = clip_norm + self.group_name = group_name def process_context(self, context, param, grad): - cls = self.__class__ - cls.check_init() + if self.group_name not in context: + context[self.group_name] = [] + context[self.group_name + "_clip_value"] = self.clip_norm + context[self.group_name + "_clip"] = layers.fill_constant( + shape=[1], dtype="float32", value=self.clip_norm) + else: + if not self.clip_norm == context[self.group_name + "_clip_value"]: + raise ValueError( + "All parameters' 'clip_norm' of a same group should be the same" + ) - cls.local_norm_var = layers.reduce_sum( - input=layers.pow(x=grad, factor=2.0)) - layers.sums( - input=[cls.local_norm_var, cls.global_norm_var], - out=[cls.global_norm_var]) + local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0)) + context[self.group_name].append(local_norm_var) - def create_operators(self, param, grad): - cls = self.__class__ - cls.check_init() + self.context = context - if cls.scale_var is None: - layers.sqrt(x=cls.global_norm_var, out=cls.global_norm_var) - cls.scale_var = layers.elementwise_div( - x=cls.clip_norm_var, + def create_operators(self, param, grad): + group_scale_name = self.group_name + "_scale" + if group_scale_name not in self.context: + group_norm_var = layers.sums(input=self.context[self.group_name]) + layers.sqrt(x=group_norm_var, out=group_norm_var) + clip_var = self.context[self.group_name + "_clip"] + group_scale_var = layers.elementwise_div( + x=clip_var, y=layers.elementwise_max( - x=cls.clip_norm_var, y=cls.global_norm_var)) - assert cls.scale_var.shape == (1L, ) + x=clip_var, y=group_norm_var)) + assert group_scale_var.shape == (1L, ) + self.context[group_scale_name] = group_scale_var - new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var) + new_grad = layers.elementwise_mul( + x=grad, y=self.context[group_scale_name]) return param, new_grad -def gradient_clip_by_global_norm(clip_norm, param_list=None, program=None): +def gradient_clip_by_global_norm(clip_norm, + param_list=None, + group_name="default_group", + program=None): if program is None: program = framework.default_main_program() if param_list is None: @@ -175,9 +169,9 @@ def gradient_clip_by_global_norm(clip_norm, param_list=None, program=None): "'param_list' should be a list of Parameter or basestring(parameter's name)." ) - GradientClipByGlobalNorm.init(clip_norm) for param in param_list: - param.gradient_clip_attr = GradientClipByGlobalNorm() + param.gradient_clip_attr = GradientClipByGlobalNorm(clip_norm, + group_name) def append_gradient_clip_ops(param_grad): diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py index 4fb7f0b2cb..75c5fd9892 100644 --- a/python/paddle/v2/fluid/tests/test_gradient_clip.py +++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py @@ -15,21 +15,10 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.fluid as fluid - -def _get_global_param_norm_(params_grads): - res = fluid.layers.fill_constant(shape=[1], dtype="float32", value=0.0) - for _, grad in params_grads: - norm_var = fluid.layers.reduce_sum( - input=fluid.layers.pow(x=grad, factor=2.0)) - fluid.layers.sums(input=[norm_var, res], out=[res]) - fluid.layers.sqrt(x=res, out=res) - return res - - BATCH_SIZE = 128 -CLIP = 0.5 -prog = fluid.framework.Program() +CLIP = 1 +prog = fluid.framework.Program() with fluid.program_guard(main_program=prog): image = fluid.layers.data(name='x', shape=[784], dtype='float32') @@ -49,13 +38,12 @@ avg_cost_clip = prog_clip.block(0).var(avg_cost.name) p_g = fluid.backward.append_backward(loss=avg_cost) p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip) -with fluid.program_guard(main_program=prog): - gloabl_norm = _get_global_param_norm_(p_g) - with fluid.program_guard(main_program=prog_clip): fluid.clip.gradient_clip_by_global_norm(clip_norm=CLIP) p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) - gloabl_norm_clip = _get_global_param_norm_(p_g_clip) + +grad_list = [elem[1] for elem in p_g] +grad_clip_list = [elem[1] for elem in p_g_clip] train_reader = paddle.batch( paddle.reader.shuffle( @@ -72,11 +60,21 @@ for data in train_reader(): count += 1 if count > 5: break - out, = exe.run(prog, feed=feeder.feed(data), fetch_list=[gloabl_norm]) - out_clip, = exe.run(prog_clip, - feed=feeder.feed(data), - fetch_list=[gloabl_norm_clip]) - - if not np.allclose(out_clip, np.minimum(out, np.array([CLIP]))): + out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list) + out_clip = exe.run(prog_clip, + feed=feeder.feed(data), + fetch_list=grad_clip_list) + global_norm = 0 + for v in out[1:]: + global_norm += np.sum(np.power(v, 2)) + global_norm = np.sqrt(global_norm) + + global_norm_clip = 0 + for v in out_clip[1:]: + global_norm_clip += np.sum(np.power(v, 2)) + global_norm_clip = np.sqrt(global_norm_clip) + + if not np.isclose( + a=global_norm_clip, b=np.minimum(global_norm, CLIP), rtol=5e-3): exit(1) exit(0) From 2f344e7f0f85f65ce0f5867ac5985d5576d07395 Mon Sep 17 00:00:00 2001 From: ying Date: Mon, 22 Jan 2018 10:22:34 +0800 Subject: [PATCH 17/22] fix name convention. --- python/paddle/v2/dataset/wmt14.py | 12 ++++++------ python/paddle/v2/dataset/wmt16.py | 32 +++++++++++++++---------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 1e54a4999b..5104e29051 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -50,8 +50,8 @@ UNK = "" UNK_IDX = 2 -def __read_to_dict__(tar_file, dict_size): - def __to_dict__(fd, size): +def __read_to_dict(tar_file, dict_size): + def __to_dict(fd, size): out_dict = dict() for line_count, line in enumerate(fd): if line_count < size: @@ -66,19 +66,19 @@ def __read_to_dict__(tar_file, dict_size): if each_item.name.endswith("src.dict") ] assert len(names) == 1 - src_dict = __to_dict__(f.extractfile(names[0]), dict_size) + src_dict = __to_dict(f.extractfile(names[0]), dict_size) names = [ each_item.name for each_item in f if each_item.name.endswith("trg.dict") ] assert len(names) == 1 - trg_dict = __to_dict__(f.extractfile(names[0]), dict_size) + trg_dict = __to_dict(f.extractfile(names[0]), dict_size) return src_dict, trg_dict def reader_creator(tar_file, file_name, dict_size): def reader(): - src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) + src_dict, trg_dict = __read_to_dict(tar_file, dict_size) with tarfile.open(tar_file, mode='r') as f: names = [ each_item.name for each_item in f @@ -160,7 +160,7 @@ def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) - src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) + src_dict, trg_dict = __read_to_dict(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} trg_dict = {v: k for k, v in trg_dict.items()} diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/v2/dataset/wmt16.py index a1899f20b5..bbc28a2da9 100644 --- a/python/paddle/v2/dataset/wmt16.py +++ b/python/paddle/v2/dataset/wmt16.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -ACL2016 Multimodal Machine Translation. Please see this websit for more details: -http://www.statmt.org/wmt16/multimodal-task.html#task1 +ACL2016 Multimodal Machine Translation. Please see this website for more +details: http://www.statmt.org/wmt16/multimodal-task.html#task1 If you use the dataset created for your task, please cite the following paper: Multi30K: Multilingual English-German Image Descriptions. @@ -56,7 +56,7 @@ END_MARK = "" UNK_MARK = "" -def __build_dict__(tar_file, dict_size, save_path, lang): +def __build_dict(tar_file, dict_size, save_path, lang): word_dict = defaultdict(int) with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile("wmt16/train"): @@ -75,12 +75,12 @@ def __build_dict__(tar_file, dict_size, save_path, lang): fout.write("%s\n" % (word[0])) -def __load_dict__(tar_file, dict_size, lang, reverse=False): +def __load_dict(tar_file, dict_size, lang, reverse=False): dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16/%s_%d.dict" % (lang, dict_size)) if not os.path.exists(dict_path) or ( len(open(dict_path, "r").readlines()) != dict_size): - __build_dict__(tar_file, dict_size, dict_path, lang) + __build_dict(tar_file, dict_size, dict_path, lang) word_dict = {} with open(dict_path, "r") as fdict: @@ -92,7 +92,7 @@ def __load_dict__(tar_file, dict_size, lang, reverse=False): return word_dict -def __get_dict_size__(src_dict_size, trg_dict_size, src_lang): +def __get_dict_size(src_dict_size, trg_dict_size, src_lang): src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else TOTAL_DE_WORDS)) trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else @@ -102,9 +102,9 @@ def __get_dict_size__(src_dict_size, trg_dict_size, src_lang): def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): def reader(): - src_dict = __load_dict__(tar_file, src_dict_size, src_lang) - trg_dict = __load_dict__(tar_file, trg_dict_size, - ("de" if src_lang == "en" else "en")) + src_dict = __load_dict(tar_file, src_dict_size, src_lang) + trg_dict = __load_dict(tar_file, trg_dict_size, + ("de" if src_lang == "en" else "en")) # the indice for start mark, end mark, and unk are the same in source # language and target language. Here uses the source language @@ -173,8 +173,8 @@ def train(src_dict_size, trg_dict_size, src_lang="en"): assert (src_lang in ["en", "de"], ("An error language type. Only support: " "en (for English); de(for Germany)")) - src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size, - trg_dict_size, src_lang) + src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, + src_lang) return reader_creator( tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, @@ -222,8 +222,8 @@ def test(src_dict_size, trg_dict_size, src_lang="en"): ("An error language type. " "Only support: en (for English); de(for Germany)")) - src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size, - trg_dict_size, src_lang) + src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, + src_lang) return reader_creator( tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, @@ -269,8 +269,8 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"): assert (src_lang in ["en", "de"], ("An error language type. " "Only support: en (for English); de(for Germany)")) - src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size, - trg_dict_size, src_lang) + src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size, + src_lang) return reader_creator( tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, @@ -308,7 +308,7 @@ def get_dict(lang, dict_size, reverse=False): "Please invoke paddle.dataset.wmt16.train/test/validation " "first to build the dictionary.") tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz") - return __load_dict__(tar_file, dict_size, lang, reverse) + return __load_dict(tar_file, dict_size, lang, reverse) def fetch(): From 2024489bb85175ff77e27910b50f032e0b485325 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Mon, 22 Jan 2018 11:59:23 +0800 Subject: [PATCH 18/22] Fix CI --- paddle/operators/compare_op.h | 2 +- paddle/operators/elementwise_op_function.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/operators/compare_op.h b/paddle/operators/compare_op.h index 15e9cfcaab..9c655d6c0d 100644 --- a/paddle/operators/compare_op.h +++ b/paddle/operators/compare_op.h @@ -54,7 +54,7 @@ class CompareOpKernel public: void Compute(const framework::ExecutionContext& context) const override { using T = typename Functor::ELEM_TYPE; - ElementwiseComputeEx(context); + ElementwiseComputeEx(context); } }; diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h index e6f3e39ece..d749b8e875 100644 --- a/paddle/operators/elementwise_op_function.h +++ b/paddle/operators/elementwise_op_function.h @@ -373,7 +373,7 @@ void ElementwiseComputeEx(const framework::ExecutionContext& ctx) { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); auto* z = ctx.Output("Out"); - z->mutable_data(ctx.GetPlace()); + z->mutable_data(ctx.GetPlace()); TransformFunctor functor( x, y, z, ctx.template device_context(), Functor()); From d3d855fbc81149bea6050c3518b041583999424f Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Mon, 22 Jan 2018 12:22:27 +0800 Subject: [PATCH 19/22] Fix unittest --- python/paddle/v2/fluid/tests/test_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/fluid/tests/test_registry.py b/python/paddle/v2/fluid/tests/test_registry.py index 4fddb91268..44e50ca55a 100644 --- a/python/paddle/v2/fluid/tests/test_registry.py +++ b/python/paddle/v2/fluid/tests/test_registry.py @@ -28,4 +28,4 @@ class TestRegistry(unittest.TestCase): exe = fluid.Executor(place) X = np.random.random((10, 10)).astype("float32") mean_out = exe.run(feed={"X": X}, fetch_list=[output]) - self.assertAlmostEqual(np.mean(X), mean_out) + self.assertAlmostEqual(np.mean(X), mean_out[0]) From 5c26f60875a3ce9fcc86c74dcf6884aa4a01c966 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Mon, 22 Jan 2018 12:33:14 +0800 Subject: [PATCH 20/22] Fix license --- python/paddle/v2/fluid/layers/control_flow.py | 10 +++++----- python/paddle/v2/fluid/layers/ops.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index 67c3b2e9ac..a7fc4f6539 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -6,11 +6,11 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import contextlib from layer_function_generator import autodoc diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index 603de1f600..19dc0fdeee 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -6,11 +6,11 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from layer_function_generator import generate_layer_fn __activations__ = [ From e8adcaf27855e452dea5b2deaddb830363cd3964 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 22 Jan 2018 12:50:45 +0800 Subject: [PATCH 21/22] update --- python/paddle/v2/fluid/clip.py | 1 + python/paddle/v2/fluid/layers/ops.py | 1 + python/paddle/v2/fluid/param_attr.py | 1 + python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py | 1 + python/paddle/v2/fluid/tests/test_gradient_clip.py | 1 + 5 files changed, 5 insertions(+) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index 777a39e105..386df9823d 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import functools import layers import framework diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index 7e52dc4c34..d296076162 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from ..registry import register_layer __activations__ = [ diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py index 17fcb262ef..dcca8b6c54 100644 --- a/python/paddle/v2/fluid/param_attr.py +++ b/python/paddle/v2/fluid/param_attr.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from initializer import Initializer, Xavier, Constant from regularizer import WeightDecayRegularizer diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 2fde3707da..8776a65bf8 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import print_function import numpy as np import paddle.v2 as paddle diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py index 75c5fd9892..4e6e6a1ef6 100644 --- a/python/paddle/v2/fluid/tests/test_gradient_clip.py +++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import numpy as np import paddle.v2 as paddle import paddle.v2.fluid as fluid From a6da470b11dd803045dd1be6c99069a2def48198 Mon Sep 17 00:00:00 2001 From: QI JUN Date: Mon, 22 Jan 2018 14:21:44 +0800 Subject: [PATCH 22/22] add memory optimization transpiler demo (#7443) * add memory optimization transpiler demo * add memory benchmark compile option * add gflags instead of macro * refine code --- paddle/framework/executor.cc | 11 ++ paddle/framework/scope.cc | 12 +- python/paddle/v2/fluid/__init__.py | 4 +- python/paddle/v2/fluid/tests/CMakeLists.txt | 1 + .../book_memory_optimization/CMakeLists.txt | 11 ++ .../test_memopt_fit_a_line.py | 44 ++++++ .../test_memopt_image_classification_train.py | 133 ++++++++++++++++++ 7 files changed, 213 insertions(+), 3 deletions(-) create mode 100644 python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt create mode 100644 python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py create mode 100644 python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index c0418c9266..1382bfca19 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -23,6 +23,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/platform/place.h" +DECLARE_bool(do_memory_benchmark); DEFINE_bool(check_nan_inf, false, "Checking whether operator produce NAN/INF or not. It will be " "extremely slow so please use this flag wisely."); @@ -117,6 +118,10 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugStringEx(local_scope); op->Run(*local_scope, place_); + if (FLAGS_do_memory_benchmark) { + VLOG(2) << "Memory used after operator " + op->Type() + " running: " + << memory::memory_usage(place_); + } if (FLAGS_check_nan_inf) { for (auto& vname : op->OutputVars(true)) { auto* var = local_scope->FindVar(vname); @@ -130,6 +135,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, if (create_vars && create_local_scope) { scope->DeleteScope(local_scope); } + if (FLAGS_do_memory_benchmark) { + VLOG(2) << "-------------------------------------------------------"; + VLOG(2) << "Memory used after deleting local scope: " + << memory::memory_usage(place_); + VLOG(2) << "-------------------------------------------------------"; + } } } // namespace framework diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 2bd0ac8f5a..a67ff91009 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -20,6 +20,10 @@ limitations under the License. */ #include "paddle/framework/threadpool.h" #include "paddle/string/printf.h" +DEFINE_bool(do_memory_benchmark, false, + "Doing memory benchmark. It will make deleting scope synchronized, " + "and add some memory usage logs"); + namespace paddle { namespace framework { @@ -88,8 +92,12 @@ void Scope::DeleteScope(Scope* scope) { auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); - // Make delete async. - Async([scope] { delete scope; }); + // When making memory benchmark on Fluid, we have to delete scope sync. + if (FLAGS_do_memory_benchmark) { + delete scope; + } else { + Async([scope] { delete scope; }); + } } void Scope::Rename(const std::string& origin_name, diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index e91eaa4f35..1f041c7459 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -86,7 +86,9 @@ def __bootstrap__(): os.environ['OMP_NUM_THREADS'] = str(num_threads) - read_env_flags = ['use_pinned_memory', 'check_nan_inf'] + read_env_flags = [ + 'use_pinned_memory', 'check_nan_inf', 'do_memory_benchmark' + ] if core.is_compile_gpu(): read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync'] core.init_gflags([sys.argv[0]] + diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt index 9a0240cbf6..8305316082 100644 --- a/python/paddle/v2/fluid/tests/CMakeLists.txt +++ b/python/paddle/v2/fluid/tests/CMakeLists.txt @@ -6,3 +6,4 @@ endforeach() add_subdirectory(book) add_subdirectory(book_distribute) +add_subdirectory(book_memory_optimization) diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt b/python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt new file mode 100644 index 0000000000..213af5d27f --- /dev/null +++ b/python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt @@ -0,0 +1,11 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +list(REMOVE_ITEM TEST_OPS test_memopt_image_classification_train) +py_test(test_memopt_image_classification_train_resnet SRCS test_memopt_image_classification_train.py ARGS resnet) +py_test(test_memopt_image_classification_train_vgg SRCS test_memopt_image_classification_train.py ARGS vgg) + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py new file mode 100644 index 0000000000..6206fcc4be --- /dev/null +++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -0,0 +1,44 @@ +import numpy as np +import paddle.v2 as paddle +import paddle.v2.fluid as fluid + +x = fluid.layers.data(name='x', shape=[13], dtype='float32') + +y_predict = fluid.layers.fc(input=x, size=1, act=None) + +y = fluid.layers.data(name='y', shape=[1], dtype='float32') + +cost = fluid.layers.square_error_cost(input=y_predict, label=y) +avg_cost = fluid.layers.mean(x=cost) + +sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) +sgd_optimizer.minimize(avg_cost) + +# memopt_program = fluid.default_main_program() +memopt_program = fluid.memory_optimize(fluid.default_main_program()) + +BATCH_SIZE = 200 + +train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.uci_housing.train(), buf_size=500), + batch_size=BATCH_SIZE) + +place = fluid.CPUPlace() +feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) +exe = fluid.Executor(place) + +exe.run(fluid.default_startup_program()) + +PASS_NUM = 100 +for pass_id in range(PASS_NUM): + fluid.io.save_persistables(exe, "./fit_a_line.model/") + fluid.io.load_persistables(exe, "./fit_a_line.model/") + for data in train_reader(): + avg_loss_value, = exe.run(memopt_program, + feed=feeder.feed(data), + fetch_list=[avg_cost]) + + if avg_loss_value[0] < 10.0: + exit(0) # if avg cost less than 10.0, we think our code is good. +exit(1) diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py new file mode 100644 index 0000000000..cc37f773c4 --- /dev/null +++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py @@ -0,0 +1,133 @@ +from __future__ import print_function + +import sys + +import paddle.v2 as paddle +import paddle.v2.fluid as fluid + + +def resnet_cifar10(input, depth=32): + def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): + tmp = fluid.layers.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=tmp, act=act) + + def shortcut(input, ch_in, ch_out, stride): + if ch_in != ch_out: + return conv_bn_layer(input, ch_out, 1, stride, 0, None) + else: + return input + + def basicblock(input, ch_in, ch_out, stride): + tmp = conv_bn_layer(input, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) + short = shortcut(input, ch_in, ch_out, stride) + return fluid.layers.elementwise_add(x=tmp, y=short, act='relu') + + def layer_warp(block_func, input, ch_in, ch_out, count, stride): + tmp = block_func(input, ch_in, ch_out, stride) + for i in range(1, count): + tmp = block_func(tmp, ch_out, ch_out, 1) + return tmp + + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + conv1 = conv_bn_layer( + input=input, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) + res2 = layer_warp(basicblock, res1, 16, 32, n, 2) + res3 = layer_warp(basicblock, res2, 32, 64, n, 2) + pool = fluid.layers.pool2d( + input=res3, pool_size=8, pool_type='avg', pool_stride=1) + return pool + + +def vgg16_bn_drop(input): + def conv_block(input, num_filter, groups, dropouts): + return fluid.nets.img_conv_group( + input=input, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act='relu', + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type='max') + + conv1 = conv_block(input, 64, 2, [0.3, 0]) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) + fc1 = fluid.layers.fc(input=drop, size=512, act=None) + bn = fluid.layers.batch_norm(input=fc1, act='relu') + drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) + fc2 = fluid.layers.fc(input=drop2, size=512, act=None) + return fc2 + + +classdim = 10 +data_shape = [3, 32, 32] + +images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') +label = fluid.layers.data(name='label', shape=[1], dtype='int64') + +net_type = "vgg" +if len(sys.argv) >= 2: + net_type = sys.argv[1] + +if net_type == "vgg": + print("train vgg net") + net = vgg16_bn_drop(images) +elif net_type == "resnet": + print("train resnet") + net = resnet_cifar10(images, 32) +else: + raise ValueError("%s network is not supported" % net_type) + +predict = fluid.layers.fc(input=net, size=classdim, act='softmax') +cost = fluid.layers.cross_entropy(input=predict, label=label) +avg_cost = fluid.layers.mean(x=cost) + +optimizer = fluid.optimizer.Adam(learning_rate=0.001) +opts = optimizer.minimize(avg_cost) + +accuracy = fluid.evaluator.Accuracy(input=predict, label=label) + +# memopt_program = fluid.default_main_program() +memopt_program = fluid.memory_optimize(fluid.default_main_program()) + +BATCH_SIZE = 128 +PASS_NUM = 1 + +train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + +place = fluid.CPUPlace() +exe = fluid.Executor(place) +feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) +exe.run(fluid.default_startup_program()) + +for pass_id in range(PASS_NUM): + accuracy.reset(exe) + for data in train_reader(): + loss, acc = exe.run(memopt_program, + feed=feeder.feed(data), + fetch_list=[avg_cost] + accuracy.metrics) + pass_acc = accuracy.eval(exe) + print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( + pass_acc)) + # this model is slow, so if we can train two mini batch, we think it works properly. + exit(0) +exit(1)