From d23ea4ef8ebc637534c5abafca995be257f83751 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Fri, 12 Jan 2018 16:47:57 +0800
Subject: [PATCH 01/22] add gradient clip by norm

---
 python/paddle/v2/fluid/clip.py       | 12 ++++++++++++
 python/paddle/v2/fluid/layers/ops.py |  1 +
 2 files changed, 13 insertions(+)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index b1fd1c2b65..eb75018d77 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -77,6 +77,18 @@ class GradientClipByValue(BaseGradientClipAttr):
         return param, new_grad
 
 
+class GradientClipByNorm(BaseGradientClipAttr):
+    def __init__(self, clip_norm):
+        self.clip_norm = clip_norm
+
+    def process_context(self, context, p_g):
+        pass
+
+    def create_operators(self, param, grad):
+        new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm)
+        return param, new_grad
+
+
 def append_gradient_clip_ops(param_grad):
     context = dict()
     create_op_callbacks = []
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index d3a5b70785..884e84011d 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -16,6 +16,7 @@ __all__ = [
     'elementwise_sub',
     'elementwise_mul',
     'clip',
+    'clip_by_norm',
     'sequence_softmax',
 ] + __activations__
 

From adc26dffa9dac81bd93c88d70f0ab66fcdcc81f0 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 15 Jan 2018 10:36:09 +0800
Subject: [PATCH 02/22] developing GradientClipByGlobalNorm

---
 python/paddle/v2/fluid/clip.py       | 54 ++++++++++++++++++++++++----
 python/paddle/v2/fluid/layers/ops.py | 20 ++++-------
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index eb75018d77..f0904e18ea 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -1,5 +1,6 @@
 import functools
 import layers
+from framework import Variable
 from . import core
 
 __all__ = [
@@ -44,7 +45,7 @@ def error_clip_callback(block, context):
 
 
 class BaseGradientClipAttr(object):
-    def process_context(self, context, p_g):
+    def process_context(self, context, param, grad):
         raise NotImplementedError()
 
     def create_operators(self, param, grad):
@@ -52,7 +53,7 @@ class BaseGradientClipAttr(object):
 
 
 class NullGradientClipAttr(BaseGradientClipAttr):
-    def process_context(self, context, p_g):
+    def process_context(self, context, param, grad):
         pass
 
     def create_operators(self, param, grad):
@@ -69,7 +70,7 @@ class GradientClipByValue(BaseGradientClipAttr):
         self.max = max
         self.min = min
 
-    def process_context(self, context, p_g):
+    def process_context(self, context, param, grad):
         pass
 
     def create_operators(self, param, grad):
@@ -81,7 +82,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
     def __init__(self, clip_norm):
         self.clip_norm = clip_norm
 
-    def process_context(self, context, p_g):
+    def process_context(self, context, param, grad):
         pass
 
     def create_operators(self, param, grad):
@@ -89,6 +90,46 @@ class GradientClipByNorm(BaseGradientClipAttr):
         return param, new_grad
 
 
+class GradientClipByGlobalNorm(BaseGradientClipAttr):
+    global_norm_var = None
+    clip_norm_var = None
+    ratio_var = None
+
+    @classmethod
+    def init(cls, clip_norm):
+        cls.global_norm_var = layers.fill_constant(
+            shape=[1], dtype="float32", value=0.0)
+        cls.clip_norm_var = layers.fill_constant(
+            shape=[1], dtype="float32", value=clip_norm)
+
+    def __init__(self):
+        if not (isinstance(self.__class__.global_norm_var, Variable) and
+                isinstance(self.__class__.clip_norm_var, Variable)):
+            raise ValueError(
+                "Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first."
+            )
+
+    def process_context(self, context, param, grad):
+        local_norm_var = layers.reduce_sum(
+            x=layers.pow(x=grad, factor=2), reduce_all=True)
+        layers.sums(
+            input=[local_norm_var, self.__class__.global_norm_var],
+            out=[self.__class__.global_norm_var])
+
+    def create_operators(self, param, grad):
+        if self.__class__.ratio_var is None:
+            self.__class__.global_norm_var = layers.sqrt(
+                x=self.__class__.global_norm_var)
+            self.__class__.ratio_var = layers.elementwise_div(
+                x=self.__class__.clip_norm_var,
+                y=layers.elementwise_max(
+                    x=self.__class__.clip_norm_var,
+                    y=self.__class__.global_norm_var))
+        # 缺乏elementwise_max
+        # 没法将ratio_var送给scale_op。
+        # new_grad = layers.
+
+
 def append_gradient_clip_ops(param_grad):
     context = dict()
     create_op_callbacks = []
@@ -98,10 +139,9 @@ def append_gradient_clip_ops(param_grad):
             clip_attr = NullGradientClipAttr()
         if not isinstance(clip_attr, BaseGradientClipAttr):
             raise TypeError(
-                "clip attribute should be an instance of BaseGradientClippingAttr"
-            )
+                "clip attribute should be an instance of BaseGradientClipAttr")
 
-        clip_attr.process_context(context=context, p_g=param_grad)
+        clip_attr.process_context(context=context, param=p, grad=g)
         create_op_callbacks.append(
             functools.partial(
                 clip_attr.create_operators, param=p, grad=g))
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index 884e84011d..021b87828f 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -1,23 +1,15 @@
 from ..registry import register_layer
 
 __activations__ = [
-    'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round'
+    'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round',
+    'pow'
 ]
 
 __all__ = [
-    'mean',
-    'mul',
-    'reshape',
-    'scale',
-    'transpose',
-    'sigmoid_cross_entropy_with_logits',
-    'elementwise_add',
-    'elementwise_div',
-    'elementwise_sub',
-    'elementwise_mul',
-    'clip',
-    'clip_by_norm',
-    'sequence_softmax',
+    'mean', 'mul', 'reshape', 'scale', 'transpose',
+    'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
+    'elementwise_sub', 'elementwise_mul', 'clip', 'clip_by_norm',
+    'sequence_softmax', 'reduce_sum'
 ] + __activations__
 
 for _OP in set(__all__):

From f189ad74426cf0970bd05016d4a2827ea6c1ea00 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Wed, 17 Jan 2018 17:27:54 +0800
Subject: [PATCH 03/22] refine the defination of class GradientClipByGlobalNorm

---
 python/paddle/v2/fluid/clip.py | 47 +++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index f0904e18ea..fcdd4c29e4 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -93,41 +93,48 @@ class GradientClipByNorm(BaseGradientClipAttr):
 class GradientClipByGlobalNorm(BaseGradientClipAttr):
     global_norm_var = None
     clip_norm_var = None
-    ratio_var = None
+    scale_var = None
 
     @classmethod
     def init(cls, clip_norm):
+        if not (isinstance(clip_norm, int) or isinstance(clip_norm, float)):
+            raise TypeError("The 'clip_norm' must be a value of int or float")
+
         cls.global_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=0.0)
         cls.clip_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=clip_norm)
 
-    def __init__(self):
-        if not (isinstance(self.__class__.global_norm_var, Variable) and
-                isinstance(self.__class__.clip_norm_var, Variable)):
+    @classmethod
+    def check_init(cls):
+        if not (isinstance(cls.global_norm_var, Variable) and
+                isinstance(cls.clip_norm_var, Variable)):
             raise ValueError(
-                "Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first."
-            )
+                "Class 'GradientClipByGlobalNorm' has not been properly initialized. \
+                 Please call GradientClipByGlobalNorm.init() first.")
+
+    @classmethod
+    def process_context(cls, context, param, grad):
+        cls.check_init()
 
-    def process_context(self, context, param, grad):
         local_norm_var = layers.reduce_sum(
             x=layers.pow(x=grad, factor=2), reduce_all=True)
         layers.sums(
-            input=[local_norm_var, self.__class__.global_norm_var],
-            out=[self.__class__.global_norm_var])
+            input=[local_norm_var, cls.global_norm_var],
+            out=[cls.global_norm_var])
 
-    def create_operators(self, param, grad):
-        if self.__class__.ratio_var is None:
-            self.__class__.global_norm_var = layers.sqrt(
-                x=self.__class__.global_norm_var)
-            self.__class__.ratio_var = layers.elementwise_div(
-                x=self.__class__.clip_norm_var,
+    @classmethod
+    def create_operators(cls, param, grad):
+        cls.check_init()
+
+        if cls.scale_var is None:
+            cls.global_norm_var = layers.sqrt(x=cls.global_norm_var)
+            cls.scale_var = layers.elementwise_div(
+                x=cls.clip_norm_var,
                 y=layers.elementwise_max(
-                    x=self.__class__.clip_norm_var,
-                    y=self.__class__.global_norm_var))
-        # 缺乏elementwise_max
-        # 没法将ratio_var送给scale_op。
-        # new_grad = layers.
+                    x=cls.clip_norm_var, y=cls.global_norm_var))
+        new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var)
+        return param, new_grad
 
 
 def append_gradient_clip_ops(param_grad):

From 4cb6e72b85fef0205a3d3ebfd136e11c009e39f6 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Wed, 17 Jan 2018 18:43:54 +0800
Subject: [PATCH 04/22] refine code details

---
 python/paddle/v2/fluid/clip.py                | 18 +++++++--------
 python/paddle/v2/fluid/framework.py           |  2 +-
 python/paddle/v2/fluid/param_attr.py          | 22 +++++++++----------
 .../tests/book/test_recognize_digits_mlp.py   | 18 +++++++--------
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index d8240dc155..f7917fc142 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -1,16 +1,16 @@
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import functools
 import layers
 from framework import Variable
@@ -162,7 +162,7 @@ def append_gradient_clip_ops(param_grad):
     context = dict()
     create_op_callbacks = []
     for p, g in param_grad:
-        clip_attr = getattr(p, 'clip_attr', NullGradientClipAttr())
+        clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr())
         if clip_attr is None:
             clip_attr = NullGradientClipAttr()
         if not isinstance(clip_attr, BaseGradientClipAttr):
diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index 8042febfed..9128a0eebe 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -946,7 +946,7 @@ class Parameter(Variable):
 
         self.regularizer = kwargs.get('regularizer', None)
 
-        self.clip_attr = kwargs.get('clip_attr', None)
+        self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
 
 
 # program is a global instance.
diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py
index 3af0190590..8c8de0d104 100644
--- a/python/paddle/v2/fluid/param_attr.py
+++ b/python/paddle/v2/fluid/param_attr.py
@@ -1,16 +1,16 @@
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from initializer import Initializer, Xavier, Constant
 from regularizer import WeightDecayRegularizer
 
@@ -24,13 +24,13 @@ class ParamAttr(object):
                  learning_rate=1.0,
                  regularizer=None,
                  trainable=True,
-                 clip=None):
+                 gradient_clip=None):
         self.name = name
         self.initializer = initializer
         self.learning_rate = learning_rate
         self.regularizer = regularizer
         self.trainable = trainable
-        self.clip = clip
+        self.gradient_clip = gradient_clip
 
     def set_default_initializer(self, initializer):
         if initializer is None:
@@ -76,7 +76,7 @@ class ParamAttr(object):
             },
             'regularizer': self.regularizer,
             'trainable': self.trainable,
-            'clip_attr': self.clip
+            'gradient_clip_attr': self.gradient_clip
         }
         if with_initializer:
             kwargs['initializer'] = self.initializer
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
index 02da2fcc85..e614e5e3f1 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
@@ -1,16 +1,16 @@
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
@@ -26,7 +26,7 @@ hidden1 = fluid.layers.fc(input=image,
                           act='relu',
                           param_attr=fluid.ParamAttr(
                               regularizer=regularizer,
-                              clip=fluid.clip.ClipByValue(10)))
+                              gradient_clip=fluid.clip.ClipByValue(10)))
 
 hidden2 = fluid.layers.fc(input=hidden1,
                           size=64,

From 6ebfade465be5526939b52b0d251486298c4c734 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Wed, 17 Jan 2018 19:14:05 +0800
Subject: [PATCH 05/22] fix copyright information

---
 paddle/gserver/tests/sequence_recurrent_group.py    | 13 +++++++++++++
 .../paddle/v2/fluid/tests/test_edit_distance_op.py  | 13 +++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py
index a1d54542e3..1343f2956f 100644
--- a/paddle/gserver/tests/sequence_recurrent_group.py
+++ b/paddle/gserver/tests/sequence_recurrent_group.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
diff --git a/python/paddle/v2/fluid/tests/test_edit_distance_op.py b/python/paddle/v2/fluid/tests/test_edit_distance_op.py
index 38e87728b3..cf118df634 100644
--- a/python/paddle/v2/fluid/tests/test_edit_distance_op.py
+++ b/python/paddle/v2/fluid/tests/test_edit_distance_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest

From 1dac173b518faeb8f31c321a61fa287b8de4246e Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Wed, 17 Jan 2018 20:15:03 +0800
Subject: [PATCH 06/22] add API for clip_by_global_norm

---
 python/paddle/v2/fluid/clip.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index f7917fc142..d1e6987e01 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import functools
 import layers
-from framework import Variable
+import framework
 from . import core
 
 __all__ = [
@@ -128,8 +128,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
 
     @classmethod
     def check_init(cls):
-        if not (isinstance(cls.global_norm_var, Variable) and
-                isinstance(cls.clip_norm_var, Variable)):
+        if not (isinstance(cls.global_norm_var, framework.Variable) and
+                isinstance(cls.clip_norm_var, framework.Variable)):
             raise ValueError(
                 "Class 'GradientClipByGlobalNorm' has not been properly initialized. \
                  Please call GradientClipByGlobalNorm.init() first.")
@@ -158,6 +158,23 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
         return param, new_grad
 
 
+def gradient_clip_by_global_norm(clip_norm, param_list=None, program=None):
+    if program is None:
+        program = framework.default_main_program()
+    if param_list is None:
+        param_list = program.block(0).all_parameters()
+    if all(isinstance(elem, basestring) for elem in param_list):
+        param_list = [program.block(0).var(elem) for elem in param_list]
+    if not all(isinstance(elem, framework.Parameter) for elem in param_list):
+        raise TypeError(
+            "'param_list' should be a list of Parameter or basestring(parameter's name)."
+        )
+
+    GradientClipByGlobalNorm.init(clip_norm)
+    for param in param_list:
+        param.gradient_clip_attr = GradientClipByGlobalNorm()
+
+
 def append_gradient_clip_ops(param_grad):
     context = dict()
     create_op_callbacks = []

From 958d07bee3343288f9813693b5a85150a5131cdd Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Wed, 17 Jan 2018 20:21:05 +0800
Subject: [PATCH 07/22] fix a error

---
 python/paddle/v2/fluid/framework.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index 9128a0eebe..91fdb5fa7e 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -777,7 +777,7 @@ class Block(object):
                 trainable=p.trainable,
                 optimize_attr=p.optimize_attr,
                 regularizer=p.regularizer,
-                clip_attr=p.clip_attr,
+                gradient_clip_attr=p.gradient_clip_attr,
                 error_clip=p.error_clip,
                 name=v.name)
             self.vars[new_p.name] = new_p

From a247972ddad05490a7b72911521bff0b48cf2d1c Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Wed, 17 Jan 2018 20:31:05 +0800
Subject: [PATCH 08/22] fix a error

---
 python/paddle/v2/fluid/clip.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index d1e6987e01..7a36df0dab 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -134,8 +134,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
                 "Class 'GradientClipByGlobalNorm' has not been properly initialized. \
                  Please call GradientClipByGlobalNorm.init() first.")
 
-    @classmethod
-    def process_context(cls, context, param, grad):
+    def process_context(self, context, param, grad):
+        cls = self.__class__
         cls.check_init()
 
         local_norm_var = layers.reduce_sum(
@@ -144,8 +144,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
             input=[local_norm_var, cls.global_norm_var],
             out=[cls.global_norm_var])
 
-    @classmethod
-    def create_operators(cls, param, grad):
+    def create_operators(self, param, grad):
+        cls = self.__class__
         cls.check_init()
 
         if cls.scale_var is None:

From 773f2f735c235afcc6ea40ddc2af23fe7a69a2e9 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Thu, 18 Jan 2018 21:06:51 +0800
Subject: [PATCH 09/22] fix errors

---
 python/paddle/v2/fluid/clip.py       | 5 +++--
 python/paddle/v2/fluid/layers/ops.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index 7a36df0dab..d4f025a4af 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -138,8 +138,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
         cls = self.__class__
         cls.check_init()
 
-        local_norm_var = layers.reduce_sum(
-            x=layers.pow(x=grad, factor=2), reduce_all=True)
+        local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0))
         layers.sums(
             input=[local_norm_var, cls.global_norm_var],
             out=[cls.global_norm_var])
@@ -154,6 +153,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
                 x=cls.clip_norm_var,
                 y=layers.elementwise_max(
                     x=cls.clip_norm_var, y=cls.global_norm_var))
+            assert cls.scale_var.shape == (1L, )
+
         new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var)
         return param, new_grad
 
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index dd3197fc00..a2055c5d7b 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -48,7 +48,7 @@ __all__ = [
     'mean', 'mul', 'reshape', 'scale', 'transpose',
     'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
     'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min',
-    'clip', 'clip_by_norm', 'sequence_softmax', 'reduce_sum'
+    'clip', 'clip_by_norm', 'sequence_softmax'
 ] + __activations__
 
 for _OP in set(__all__):

From 9c0b29014cd38c36f6a599b7c6477000db30917d Mon Sep 17 00:00:00 2001
From: Yang Yu <yuyang18@baidu.com>
Date: Fri, 19 Jan 2018 12:29:41 +0800
Subject: [PATCH 10/22] Make compare_op reuse elemwise_op_funcs

---
 paddle/operators/compare_op.cc                | 11 +++++-----
 paddle/operators/compare_op.cu                |  4 ----
 paddle/operators/compare_op.h                 | 22 ++-----------------
 paddle/operators/elementwise_op_function.h    | 12 +++++-----
 .../paddle/v2/fluid/tests/test_compare_op.py  |  2 --
 5 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/paddle/operators/compare_op.cc b/paddle/operators/compare_op.cc
index daa2c193b4..930c295a9c 100644
--- a/paddle/operators/compare_op.cc
+++ b/paddle/operators/compare_op.cc
@@ -39,6 +39,11 @@ N-dim tensor. X and Y could be any type.  The each element of the Out tensor is
 calculated by %s
 )DOC",
                                comment.type, comment.equation));
+    AddAttr<int>("axis",
+                 "(int, default -1). The start dimension index "
+                 "for broadcasting Y onto X.")
+        .SetDefault(-1)
+        .EqualGreaterThan(-1);
   }
 };
 
@@ -95,11 +100,5 @@ REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
 REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
 REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y");
 REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
-REGISTER_LOGICAL_OP(greater_than, "Out = X > Y");
-REGISTER_LOGICAL_KERNEL(greater_than, CPU,
-                        paddle::operators::GreaterThanFunctor);
-REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y");
-REGISTER_LOGICAL_KERNEL(greater_equal, CPU,
-                        paddle::operators::GreaterEqualFunctor);
 REGISTER_LOGICAL_OP(equal, "Out = X == Y");
 REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor);
diff --git a/paddle/operators/compare_op.cu b/paddle/operators/compare_op.cu
index 26049271be..f625824dbc 100644
--- a/paddle/operators/compare_op.cu
+++ b/paddle/operators/compare_op.cu
@@ -16,8 +16,4 @@ limitations under the License. */
 
 REGISTER_LOGICAL_KERNEL(less_than, CUDA, paddle::operators::LessThanFunctor);
 REGISTER_LOGICAL_KERNEL(less_equal, CUDA, paddle::operators::LessEqualFunctor);
-REGISTER_LOGICAL_KERNEL(greater_than, CUDA,
-                        paddle::operators::GreaterThanFunctor);
-REGISTER_LOGICAL_KERNEL(greater_equal, CUDA,
-                        paddle::operators::GreaterEqualFunctor);
 REGISTER_LOGICAL_KERNEL(equal, CUDA, paddle::operators::EqualFunctor);
diff --git a/paddle/operators/compare_op.h b/paddle/operators/compare_op.h
index 567e89c0a7..15e9cfcaab 100644
--- a/paddle/operators/compare_op.h
+++ b/paddle/operators/compare_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <math.h>
 #include <type_traits>
 #include "paddle/framework/op_registry.h"
+#include "paddle/operators/elementwise_op_function.h"
 #include "paddle/platform/transform.h"
 
 namespace paddle {
@@ -33,18 +34,6 @@ struct LessEqualFunctor {
   HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
 };
 
-template <typename T>
-struct GreaterThanFunctor {
-  using ELEM_TYPE = T;
-  HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
-};
-
-template <typename T>
-struct GreaterEqualFunctor {
-  using ELEM_TYPE = T;
-  HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
-};
-
 template <typename T>
 struct EqualFunctor {
   using ELEM_TYPE = T;
@@ -65,14 +54,7 @@ class CompareOpKernel
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     using T = typename Functor::ELEM_TYPE;
-    auto* x = context.Input<framework::Tensor>("X");
-    auto* y = context.Input<framework::Tensor>("Y");
-    auto* out = context.Output<framework::Tensor>("Out");
-    Functor binary_func;
-    platform::Transform<DeviceContext> trans;
-    trans(context.template device_context<DeviceContext>(), x->data<T>(),
-          x->data<T>() + x->numel(), y->data<T>(),
-          out->mutable_data<bool>(context.GetPlace()), binary_func);
+    ElementwiseComputeEx<Functor, DeviceContext, T>(context);
   }
 };
 
diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h
index db5d30c1af..e6f3e39ece 100644
--- a/paddle/operators/elementwise_op_function.h
+++ b/paddle/operators/elementwise_op_function.h
@@ -176,14 +176,15 @@ class MidWiseTransformIterator<T, platform::CUDADeviceContext>
 };
 #endif
 
-template <typename Functor, typename T, typename DeviceContext>
+template <typename Functor, typename T, typename DeviceContext,
+          typename OutType = T>
 class TransformFunctor {
  public:
   TransformFunctor(const framework::Tensor* x, const framework::Tensor* y,
                    framework::Tensor* z, const DeviceContext& ctx, Functor func)
       : x_(x->data<T>()),
         y_(y->data<T>()),
-        z_(z->mutable_data<T>(ctx.GetPlace())),
+        z_(z->mutable_data<OutType>(ctx.GetPlace())),
         nx_(x->numel()),
         ctx_(ctx),
         func_(func) {}
@@ -208,7 +209,7 @@ class TransformFunctor {
  private:
   const T* x_;
   const T* y_;
-  T* z_;
+  OutType* z_;
   int64_t nx_;
   const DeviceContext& ctx_;
   Functor func_;
@@ -364,7 +365,8 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
   }
 }
 
-template <typename Functor, typename DeviceContext, typename T>
+template <typename Functor, typename DeviceContext, typename T,
+          typename OutType = T>
 void ElementwiseComputeEx(const framework::ExecutionContext& ctx) {
   using Tensor = framework::Tensor;
 
@@ -372,7 +374,7 @@ void ElementwiseComputeEx(const framework::ExecutionContext& ctx) {
   auto* y = ctx.Input<Tensor>("Y");
   auto* z = ctx.Output<Tensor>("Out");
   z->mutable_data<T>(ctx.GetPlace());
-  TransformFunctor<Functor, T, DeviceContext> functor(
+  TransformFunctor<Functor, T, DeviceContext, OutType> functor(
       x, y, z, ctx.template device_context<DeviceContext>(), Functor());
 
   auto x_dims = x->dims();
diff --git a/python/paddle/v2/fluid/tests/test_compare_op.py b/python/paddle/v2/fluid/tests/test_compare_op.py
index fbf8921e40..00e781c616 100644
--- a/python/paddle/v2/fluid/tests/test_compare_op.py
+++ b/python/paddle/v2/fluid/tests/test_compare_op.py
@@ -37,8 +37,6 @@ def create_test_class(op_type, typename, callback):
 for _type_name in {'float32', 'float64', 'int32', 'int64'}:
     create_test_class('less_than', _type_name, lambda _a, _b: _a < _b)
     create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b)
-    create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b)
-    create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b)
     create_test_class('equal', _type_name, lambda _a, _b: _a == _b)
 
 if __name__ == '__main__':

From 42b0748ab4f797902cadad4b5278a4cb9fdea9bd Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Fri, 19 Jan 2018 15:12:39 +0800
Subject: [PATCH 11/22] add unittest

---
 python/paddle/v2/fluid/clip.py                | 14 +++-
 .../{test_clip.py => test_error_clip.py}      |  0
 .../v2/fluid/tests/test_gradient_clip.py      | 82 +++++++++++++++++++
 3 files changed, 93 insertions(+), 3 deletions(-)
 rename python/paddle/v2/fluid/tests/{test_clip.py => test_error_clip.py} (100%)
 create mode 100644 python/paddle/v2/fluid/tests/test_gradient_clip.py

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index d4f025a4af..f6ff83924f 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -113,6 +113,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
 
 class GradientClipByGlobalNorm(BaseGradientClipAttr):
     global_norm_var = None
+    local_norm_var = None
     clip_norm_var = None
     scale_var = None
 
@@ -123,12 +124,18 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
 
         cls.global_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=0.0)
+        cls.local_norm_var = framework.default_main_program().current_block(
+        ).create_var(
+            name=framework.unique_name("local_norm"),
+            dtype="float32",
+            persistable=False)
         cls.clip_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=clip_norm)
 
     @classmethod
     def check_init(cls):
         if not (isinstance(cls.global_norm_var, framework.Variable) and
+                isinstance(cls.local_norm_var, framework.Variable) and
                 isinstance(cls.clip_norm_var, framework.Variable)):
             raise ValueError(
                 "Class 'GradientClipByGlobalNorm' has not been properly initialized. \
@@ -138,9 +145,10 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
         cls = self.__class__
         cls.check_init()
 
-        local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0))
+        cls.local_norm_var = layers.reduce_sum(
+            input=layers.pow(x=grad, factor=2.0))
         layers.sums(
-            input=[local_norm_var, cls.global_norm_var],
+            input=[cls.local_norm_var, cls.global_norm_var],
             out=[cls.global_norm_var])
 
     def create_operators(self, param, grad):
@@ -148,7 +156,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
         cls.check_init()
 
         if cls.scale_var is None:
-            cls.global_norm_var = layers.sqrt(x=cls.global_norm_var)
+            layers.sqrt(x=cls.global_norm_var, out=cls.global_norm_var)
             cls.scale_var = layers.elementwise_div(
                 x=cls.clip_norm_var,
                 y=layers.elementwise_max(
diff --git a/python/paddle/v2/fluid/tests/test_clip.py b/python/paddle/v2/fluid/tests/test_error_clip.py
similarity index 100%
rename from python/paddle/v2/fluid/tests/test_clip.py
rename to python/paddle/v2/fluid/tests/test_error_clip.py
diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py
new file mode 100644
index 0000000000..4fb7f0b2cb
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py
@@ -0,0 +1,82 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+
+def _get_global_param_norm_(params_grads):
+    res = fluid.layers.fill_constant(shape=[1], dtype="float32", value=0.0)
+    for _, grad in params_grads:
+        norm_var = fluid.layers.reduce_sum(
+            input=fluid.layers.pow(x=grad, factor=2.0))
+        fluid.layers.sums(input=[norm_var, res], out=[res])
+    fluid.layers.sqrt(x=res, out=res)
+    return res
+
+
+BATCH_SIZE = 128
+CLIP = 0.5
+prog = fluid.framework.Program()
+
+with fluid.program_guard(main_program=prog):
+    image = fluid.layers.data(name='x', shape=[784], dtype='float32')
+
+    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
+    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
+    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+
+    label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+
+prog_clip = prog.clone()
+
+avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
+
+p_g = fluid.backward.append_backward(loss=avg_cost)
+p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
+
+with fluid.program_guard(main_program=prog):
+    gloabl_norm = _get_global_param_norm_(p_g)
+
+with fluid.program_guard(main_program=prog_clip):
+    fluid.clip.gradient_clip_by_global_norm(clip_norm=CLIP)
+    p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
+    gloabl_norm_clip = _get_global_param_norm_(p_g_clip)
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.mnist.train(), buf_size=8192),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
+exe.run(fluid.default_startup_program())
+
+count = 0
+for data in train_reader():
+    count += 1
+    if count > 5:
+        break
+    out, = exe.run(prog, feed=feeder.feed(data), fetch_list=[gloabl_norm])
+    out_clip, = exe.run(prog_clip,
+                        feed=feeder.feed(data),
+                        fetch_list=[gloabl_norm_clip])
+
+    if not np.allclose(out_clip, np.minimum(out, np.array([CLIP]))):
+        exit(1)
+exit(0)

From 408a6b8bb2af4f8f075680bb361daad329ad6eca Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Fri, 19 Jan 2018 15:17:35 +0800
Subject: [PATCH 12/22] tiny fix

---
 python/paddle/v2/fluid/clip.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index f6ff83924f..9800ad7c5d 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -124,11 +124,11 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
 
         cls.global_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=0.0)
-        cls.local_norm_var = framework.default_main_program().current_block(
-        ).create_var(
-            name=framework.unique_name("local_norm"),
-            dtype="float32",
-            persistable=False)
+        cls.local_norm_var = framework.default_main_program().block(
+            0).create_var(
+                name=framework.unique_name("local_norm"),
+                dtype="float32",
+                persistable=False)
         cls.clip_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=clip_norm)
 

From 538f1ad28f766c0e47ef4eef2ec59e187ba30f8e Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Fri, 19 Jan 2018 15:24:14 +0800
Subject: [PATCH 13/22] tiny fix

---
 python/paddle/v2/fluid/clip.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index 9800ad7c5d..d97cd9ecc9 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -124,11 +124,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
 
         cls.global_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=0.0)
-        cls.local_norm_var = framework.default_main_program().block(
-            0).create_var(
-                name=framework.unique_name("local_norm"),
-                dtype="float32",
-                persistable=False)
+        cls.local_norm_var = layers.create_tensor(dtype="float32")
         cls.clip_norm_var = layers.fill_constant(
             shape=[1], dtype="float32", value=clip_norm)
 

From 22662ae4245a26d51b1c0857939934041e5bee56 Mon Sep 17 00:00:00 2001
From: Yang Yu <yuyang18@baidu.com>
Date: Fri, 19 Jan 2018 16:52:42 +0800
Subject: [PATCH 14/22] Move paddle.v2.fluid.registery to layers

* registery is only used by layers
* Rename it to layer_function_generator
---
 python/paddle/v2/fluid/layers/control_flow.py | 13 ++++----
 python/paddle/v2/fluid/layers/device.py       |  6 ++--
 .../layer_function_generator.py}              | 29 ++++++++++-------
 python/paddle/v2/fluid/layers/ops.py          |  4 +--
 python/paddle/v2/fluid/tests/test_registry.py | 32 ++++++++-----------
 5 files changed, 43 insertions(+), 41 deletions(-)
 rename python/paddle/v2/fluid/{registry.py => layers/layer_function_generator.py} (93%)

diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index e72b22c83f..f333c3c26e 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -11,12 +11,13 @@
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
 #limitations under the License.
-from ..layer_helper import LayerHelper, unique_name
-from ..framework import Program, Variable, Operator
-from .. import core
-from tensor import assign, fill_constant
 import contextlib
-from ..registry import autodoc
+
+from layer_function_generator import autodoc
+from tensor import assign, fill_constant
+from .. import core
+from ..framework import Program, Variable, Operator
+from ..layer_helper import LayerHelper, unique_name
 
 __all__ = [
     'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard',
@@ -1457,7 +1458,7 @@ class DynamicRNN(object):
                 method))
 
 
-@autodoc
+@autodoc()
 def reorder_lod_tensor_by_rank(x, rank_table):
     helper = LayerHelper('reorder_lod_tensor_by_rank', **locals())
     helper.is_instance('x', Variable)
diff --git a/python/paddle/v2/fluid/layers/device.py b/python/paddle/v2/fluid/layers/device.py
index ef74b2b2f0..b586a213ca 100644
--- a/python/paddle/v2/fluid/layers/device.py
+++ b/python/paddle/v2/fluid/layers/device.py
@@ -15,14 +15,14 @@
 All util layers.
 """
 
-from ..layer_helper import LayerHelper
+from layer_function_generator import autodoc
 from ..framework import unique_name
-from ..registry import autodoc
+from ..layer_helper import LayerHelper
 
 __all__ = ['get_places']
 
 
-@autodoc
+@autodoc()
 def get_places(device_count=None, device_type=None):
     helper = LayerHelper('get_places', **locals())
     out_places = helper.create_variable(name=unique_name(helper.name + ".out"))
diff --git a/python/paddle/v2/fluid/registry.py b/python/paddle/v2/fluid/layers/layer_function_generator.py
similarity index 93%
rename from python/paddle/v2/fluid/registry.py
rename to python/paddle/v2/fluid/layers/layer_function_generator.py
index 6c0c3a3518..0c4cda86a3 100644
--- a/python/paddle/v2/fluid/registry.py
+++ b/python/paddle/v2/fluid/layers/layer_function_generator.py
@@ -11,19 +11,21 @@
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
 #limitations under the License.
-import re
 import cStringIO
-import warnings
 import functools
-import inspect
+import re
+import warnings
 
-import proto.framework_pb2 as framework_pb2
-from framework import OpProtoHolder, Variable, Program, Operator
-from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
+from .. import proto
+
+framework_pb2 = proto.framework_pb2
+
+from ..framework import OpProtoHolder, Variable
+from ..layer_helper import LayerHelper
 
 __all__ = [
     'deprecated',
-    'register_layer',
+    'generate_layer_fn',
     'autodoc',
 ]
 
@@ -96,7 +98,7 @@ def _generate_doc_string_(op_proto):
     return buf.getvalue()
 
 
-def register_layer(op_type):
+def generate_layer_fn(op_type):
     """Register the Python layer for an Operator.
 
     Args:
@@ -202,7 +204,10 @@ def deprecated(func_or_class):
     return func_wrapper
 
 
-def autodoc(func):
-    func.__doc__ = _generate_doc_string_(OpProtoHolder.instance().get_op_proto(
-        func.__name__))
-    return func
+def autodoc(comment=""):
+    def __impl__(func):
+        func.__doc__ = _generate_doc_string_(OpProtoHolder.instance(
+        ).get_op_proto(func.__name__)) + comment
+        return func
+
+    return __impl__
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index 21945edf08..24f6b7294e 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -11,7 +11,7 @@
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
 #limitations under the License.
-from ..registry import register_layer
+from layer_function_generator import generate_layer_fn
 
 __activations__ = [
     'sigmoid',
@@ -62,4 +62,4 @@ __all__ = [
 ] + __activations__
 
 for _OP in set(__all__):
-    globals()[_OP] = register_layer(_OP)
+    globals()[_OP] = generate_layer_fn(_OP)
diff --git a/python/paddle/v2/fluid/tests/test_registry.py b/python/paddle/v2/fluid/tests/test_registry.py
index dba1189630..b1749a76f0 100644
--- a/python/paddle/v2/fluid/tests/test_registry.py
+++ b/python/paddle/v2/fluid/tests/test_registry.py
@@ -1,35 +1,31 @@
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import unittest
-import warnings
 
 import paddle.v2.fluid as fluid
-import paddle.v2.fluid.framework as framework
-import paddle.v2.fluid.layers as layers
-import paddle.v2.fluid.registry as registry
+import numpy as np
+import decorators
 
 
 class TestRegistry(unittest.TestCase):
+    @decorators.prog_scope()
     def test_registry_layer(self):
-        self.layer_type = "mean"
-        program = framework.Program()
-
         x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32')
-        output = layers.mean(x)
+        output = fluid.layers.mean(x=x)
+
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
-
         X = np.random.random((10, 10)).astype("float32")
-        mean_out = exe.run(program, feed={"X": X}, fetch_list=[output])
+        mean_out = exe.run(feed={"X": X}, fetch_list=[output])
         self.assertAlmostEqual(np.mean(X), mean_out)

From 9a97c7f745b0014504f9c52897b72fd58147e70a Mon Sep 17 00:00:00 2001
From: ying <lcy.seso@gmail.com>
Date: Wed, 17 Jan 2018 20:09:44 +0800
Subject: [PATCH 15/22] add wmt16 into dataset.

---
 python/paddle/v2/dataset/__init__.py          |  16 +-
 python/paddle/v2/dataset/common.py            |  21 +-
 python/paddle/v2/dataset/tests/wmt16_test.py  |  66 ++++
 python/paddle/v2/dataset/wmt14.py             |  18 +-
 python/paddle/v2/dataset/wmt16.py             | 348 ++++++++++++++++++
 python/paddle/v2/fluid/layers/control_flow.py |  33 +-
 6 files changed, 482 insertions(+), 20 deletions(-)
 create mode 100644 python/paddle/v2/dataset/tests/wmt16_test.py
 create mode 100644 python/paddle/v2/dataset/wmt16.py

diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py
index 90830515c1..c1acbecd9c 100644
--- a/python/paddle/v2/dataset/__init__.py
+++ b/python/paddle/v2/dataset/__init__.py
@@ -24,11 +24,23 @@ import conll05
 import uci_housing
 import sentiment
 import wmt14
+import wmt16
 import mq2007
 import flowers
 import voc2012
 
 __all__ = [
-    'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
-    'uci_housing', 'wmt14', 'mq2007', 'flowers', 'voc2012'
+    'mnist',
+    'imikolov',
+    'imdb',
+    'cifar',
+    'movielens',
+    'conll05',
+    'sentiment'
+    'uci_housing',
+    'wmt14',
+    'wmt16',
+    'mq2007',
+    'flowers',
+    'voc2012',
 ]
diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py
index fab8a68b0b..9aba35a648 100644
--- a/python/paddle/v2/dataset/common.py
+++ b/python/paddle/v2/dataset/common.py
@@ -25,8 +25,12 @@ import glob
 import cPickle as pickle
 
 __all__ = [
-    'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
-    'convert'
+    'DATA_HOME',
+    'download',
+    'md5file',
+    'split',
+    'cluster_files_reader',
+    'convert',
 ]
 
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
@@ -58,12 +62,15 @@ def md5file(fname):
     return hash_md5.hexdigest()
 
 
-def download(url, module_name, md5sum):
+def download(url, module_name, md5sum, save_name=None):
     dirname = os.path.join(DATA_HOME, module_name)
     if not os.path.exists(dirname):
         os.makedirs(dirname)
 
-    filename = os.path.join(dirname, url.split('/')[-1])
+    filename = os.path.join(dirname,
+                            url.split('/')[-1]
+                            if save_name is None else save_name)
+
     retry = 0
     retry_limit = 3
     while not (os.path.exists(filename) and md5file(filename) == md5sum):
@@ -196,9 +203,11 @@ def convert(output_path, reader, line_count, name_prefix):
     Convert data from reader to recordio format files.
 
     :param output_path: directory in which output files will be saved.
-    :param reader: a data reader, from which the convert program will read data instances.
+    :param reader: a data reader, from which the convert program will read
+                   data instances.
     :param name_prefix: the name prefix of generated files.
-    :param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
+    :param max_lines_to_shuffle: the max lines numbers to shuffle before
+                                 writing.
     """
 
     assert line_count >= 1
diff --git a/python/paddle/v2/dataset/tests/wmt16_test.py b/python/paddle/v2/dataset/tests/wmt16_test.py
new file mode 100644
index 0000000000..cef6c3216e
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/wmt16_test.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2.dataset.wmt16
+import unittest
+
+
+class TestWMT16(unittest.TestCase):
+    def checkout_one_sample(self, sample):
+        # train data has 3 field: source language word indices,
+        # target language word indices, and target next word indices.
+        self.assertEqual(len(sample), 3)
+
+        # test start mark and end mark in source word indices.
+        self.assertEqual(sample[0][0], 0)
+        self.assertEqual(sample[0][-1], 1)
+
+        # test start mask in target word indices
+        self.assertEqual(sample[1][0], 0)
+
+        # test en mask in target next word indices
+        self.assertEqual(sample[2][-1], 1)
+
+    def test_train(self):
+        for idx, sample in enumerate(
+                paddle.v2.dataset.wmt16.train(
+                    src_dict_size=100000, trg_dict_size=100000)()):
+            if idx >= 10: break
+            self.checkout_one_sample(sample)
+
+    def test_test(self):
+        for idx, sample in enumerate(
+                paddle.v2.dataset.wmt16.test(
+                    src_dict_size=1000, trg_dict_size=1000)()):
+            if idx >= 10: break
+            self.checkout_one_sample(sample)
+
+    def test_val(self):
+        for idx, sample in enumerate(
+                paddle.v2.dataset.wmt16.validation(
+                    src_dict_size=1000, trg_dict_size=1000)()):
+            if idx >= 10: break
+            self.checkout_one_sample(sample)
+
+    def test_get_dict(self):
+        dict_size = 1000
+        word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True)
+        self.assertEqual(len(word_dict), dict_size)
+        self.assertEqual(word_dict[0], "<s>")
+        self.assertEqual(word_dict[1], "<e>")
+        self.assertEqual(word_dict[2], "<unk>")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
index 95a35d97ce..1e54a4999b 100644
--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -25,12 +25,20 @@ import gzip
 import paddle.v2.dataset.common
 from paddle.v2.parameters import Parameters
 
-__all__ = ['train', 'test', 'build_dict', 'convert']
-
-URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
+__all__ = [
+    'train',
+    'test',
+    'get_dict',
+    'convert',
+]
+
+URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/'
+                'cslm_joint_paper/data/dev+test.tgz')
 MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
-# this is a small set of data for test. The original data is too large and will be add later.
-URL_TRAIN = 'http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
+# this is a small set of data for test. The original data is too large and
+# will be add later.
+URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/'
+             'wmt_shrinked_data/wmt14.tgz')
 MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c'
 # BLEU of this trained model is 26.92
 URL_MODEL = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.gz'
diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/v2/dataset/wmt16.py
new file mode 100644
index 0000000000..a1899f20b5
--- /dev/null
+++ b/python/paddle/v2/dataset/wmt16.py
@@ -0,0 +1,348 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ACL2016 Multimodal Machine Translation. Please see this websit for more details:
+http://www.statmt.org/wmt16/multimodal-task.html#task1
+
+If you use the dataset created for your task, please cite the following paper:
+Multi30K: Multilingual English-German Image Descriptions.
+
+@article{elliott-EtAl:2016:VL16,
+ author    = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.},
+ title     = {Multi30K: Multilingual English-German Image Descriptions},
+ booktitle = {Proceedings of the 6th Workshop on Vision and Language},
+ year      = {2016},
+ pages     = {70--74},
+ year      = 2016
+}
+"""
+
+import os
+import tarfile
+import gzip
+from collections import defaultdict
+
+import paddle.v2.dataset.common
+
+__all__ = [
+    "train",
+    "test",
+    "validation",
+    "convert",
+    "fetch",
+    "get_dict",
+]
+
+DATA_URL = ("http://cloud.dlnel.org/filepub/"
+            "?uuid=46a0808e-ddd8-427c-bacd-0dbc6d045fed")
+DATA_MD5 = "0c38be43600334966403524a40dcd81e"
+
+TOTAL_EN_WORDS = 11250
+TOTAL_DE_WORDS = 19220
+
+START_MARK = "<s>"
+END_MARK = "<e>"
+UNK_MARK = "<unk>"
+
+
+def __build_dict__(tar_file, dict_size, save_path, lang):
+    word_dict = defaultdict(int)
+    with tarfile.open(tar_file, mode="r") as f:
+        for line in f.extractfile("wmt16/train"):
+            line_split = line.strip().split("\t")
+            if len(line_split) != 2: continue
+            sen = line_split[0] if lang == "en" else line_split[1]
+            for w in sen.split():
+                word_dict[w] += 1
+
+    with open(save_path, "w") as fout:
+        fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK))
+        for idx, word in enumerate(
+                sorted(
+                    word_dict.iteritems(), key=lambda x: x[1], reverse=True)):
+            if idx + 3 == dict_size: break
+            fout.write("%s\n" % (word[0]))
+
+
+def __load_dict__(tar_file, dict_size, lang, reverse=False):
+    dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
+                             "wmt16/%s_%d.dict" % (lang, dict_size))
+    if not os.path.exists(dict_path) or (
+            len(open(dict_path, "r").readlines()) != dict_size):
+        __build_dict__(tar_file, dict_size, dict_path, lang)
+
+    word_dict = {}
+    with open(dict_path, "r") as fdict:
+        for idx, line in enumerate(fdict):
+            if reverse:
+                word_dict[idx] = line.strip()
+            else:
+                word_dict[line.strip()] = idx
+    return word_dict
+
+
+def __get_dict_size__(src_dict_size, trg_dict_size, src_lang):
+    src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else
+                                        TOTAL_DE_WORDS))
+    trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else
+                                        TOTAL_ENG_WORDS))
+    return src_dict_size, trg_dict_size
+
+
+def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
+    def reader():
+        src_dict = __load_dict__(tar_file, src_dict_size, src_lang)
+        trg_dict = __load_dict__(tar_file, trg_dict_size,
+                                 ("de" if src_lang == "en" else "en"))
+
+        # the indice for start mark, end mark, and unk are the same in source
+        # language and target language. Here uses the source language
+        # dictionary to determine their indices.
+        start_id = src_dict[START_MARK]
+        end_id = src_dict[END_MARK]
+        unk_id = src_dict[UNK_MARK]
+
+        src_col = 0 if src_lang == "en" else 1
+        trg_col = 1 - src_col
+
+        with tarfile.open(tar_file, mode="r") as f:
+            for line in f.extractfile(file_name):
+                line_split = line.strip().split("\t")
+                if len(line_split) != 2:
+                    continue
+                src_words = line_split[src_col].split()
+                src_ids = [start_id] + [
+                    src_dict.get(w, unk_id) for w in src_words
+                ] + [end_id]
+
+                trg_words = line_split[trg_col].split()
+                trg_ids = [trg_dict.get(w, unk_id) for w in trg_words]
+
+                trg_ids_next = trg_ids + [end_id]
+                trg_ids = [start_id] + trg_ids
+
+                yield src_ids, trg_ids, trg_ids_next
+
+    return reader
+
+
+def train(src_dict_size, trg_dict_size, src_lang="en"):
+    """
+    WMT16 train set reader.
+
+    This function returns the reader for train data. Each sample the reader
+    returns is made up of three fields: the source language word index sequence,
+    target language word index sequence and next word index sequence.
+
+
+    NOTE:
+    The original like for training data is:
+    http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz
+
+    paddle.dataset.wmt16 provides a tokenized version of the original dataset by
+    using moses's tokenization script:
+    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
+
+    Args:
+        src_dict_size(int): Size of the source language dictionary. Three
+                            special tokens will be added into the dictionary:
+                            <s> for start mark, <e> for end mark, and <unk> for
+                            unknown word.
+        trg_dict_size(int): Size of the target language dictionary. Three
+                            special tokens will be added into the dictionary:
+                            <s> for start mark, <e> for end mark, and <unk> for
+                            unknown word.
+        src_lang(string): A string indicating which language is the source
+                          language. Available options are: "en" for English
+                          and "de" for Germany.
+
+    Returns:
+        callable: The train reader.
+    """
+
+    assert (src_lang in ["en", "de"], ("An error language type.  Only support: "
+                                       "en (for English); de(for Germany)"))
+    src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size,
+                                                     trg_dict_size, src_lang)
+
+    return reader_creator(
+        tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
+                                                   "wmt16.tar.gz"),
+        file_name="wmt16/train",
+        src_dict_size=src_dict_size,
+        trg_dict_size=trg_dict_size,
+        src_lang=src_lang)
+
+
+def test(src_dict_size, trg_dict_size, src_lang="en"):
+    """
+    WMT16 test set reader.
+
+    This function returns the reader for test data. Each sample the reader
+    returns is made up of three fields: the source language word index sequence,
+    target language word index sequence and next word index sequence.
+
+    NOTE:
+    The original like for test data is:
+    http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz
+
+    paddle.dataset.wmt16 provides a tokenized version of the original dataset by
+    using moses's tokenization script:
+    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
+
+    Args:
+        src_dict_size(int): Size of the source language dictionary. Three
+                            special tokens will be added into the dictionary:
+                            <s> for start mark, <e> for end mark, and <unk> for
+                            unknown word.
+        trg_dict_size(int): Size of the target language dictionary. Three
+                            special tokens will be added into the dictionary:
+                            <s> for start mark, <e> for end mark, and <unk> for
+                            unknown word.
+        src_lang(string): A string indicating which language is the source
+                          language. Available options are: "en" for English
+                          and "de" for Germany.
+
+    Returns:
+        callable: The test reader.
+    """
+
+    assert (src_lang in ["en", "de"],
+            ("An error language type.  "
+             "Only support: en (for English); de(for Germany)"))
+
+    src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size,
+                                                     trg_dict_size, src_lang)
+
+    return reader_creator(
+        tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
+                                                   "wmt16.tar.gz"),
+        file_name="wmt16/test",
+        src_dict_size=src_dict_size,
+        trg_dict_size=trg_dict_size,
+        src_lang=src_lang)
+
+
+def validation(src_dict_size, trg_dict_size, src_lang="en"):
+    """
+    WMT16 validation set reader.
+
+    This function returns the reader for validation data. Each sample the reader
+    returns is made up of three fields: the source language word index sequence,
+    target language word index sequence and next word index sequence.
+
+    NOTE:
+    The original like for validation data is:
+    http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz
+
+    paddle.dataset.wmt16 provides a tokenized version of the original dataset by
+    using moses's tokenization script:
+    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
+
+    Args:
+        src_dict_size(int): Size of the source language dictionary. Three
+                            special tokens will be added into the dictionary:
+                            <s> for start mark, <e> for end mark, and <unk> for
+                            unknown word.
+        trg_dict_size(int): Size of the target language dictionary. Three
+                            special tokens will be added into the dictionary:
+                            <s> for start mark, <e> for end mark, and <unk> for
+                            unknown word.
+        src_lang(string): A string indicating which language is the source
+                          language. Available options are: "en" for English
+                          and "de" for Germany.
+
+    Returns:
+        callable: The validation reader.
+    """
+    assert (src_lang in ["en", "de"],
+            ("An error language type.  "
+             "Only support: en (for English); de(for Germany)"))
+    src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size,
+                                                     trg_dict_size, src_lang)
+
+    return reader_creator(
+        tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
+                                                   "wmt16.tar.gz"),
+        file_name="wmt16/val",
+        src_dict_size=src_dict_size,
+        trg_dict_size=trg_dict_size,
+        src_lang=src_lang)
+
+
+def get_dict(lang, dict_size, reverse=False):
+    """
+    return the word dictionary for the specified language.
+
+    Args:
+        lang(string): A string indicating which language is the source
+                      language. Available options are: "en" for English
+                      and "de" for Germany.
+        dict_size(int): Size of the specified language dictionary.
+        reverse(bool): If reverse is set to False, the returned python
+                       dictionary will use word as key and use index as value.
+                       If reverse is set to True, the returned python
+                       dictionary will use index as key and word as value.
+
+    Returns:
+        dict: The word dictionary for the specific language.
+    """
+
+    if lang == "en": dict_size = min(dict_size, TOTAL_EN_WORDS)
+    else: dict_size = min(dict_size, TOTAL_DE_WORDS)
+
+    dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
+                             "wmt16/%s_%d.dict" % (lang, dict_size))
+    assert (os.path.exists(dict_path), "Word dictionary does not exist. "
+            "Please invoke paddle.dataset.wmt16.train/test/validation "
+            "first to build the dictionary.")
+    tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz")
+    return __load_dict__(tar_file, dict_size, lang, reverse)
+
+
+def fetch():
+    """download the entire dataset.
+    """
+    paddle.v4.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
+                                      "wmt16.tar.gz")
+
+
+def convert(path, src_dict_size, trg_dict_size, src_lang):
+    """Converts dataset to recordio format.
+    """
+
+    paddle.v2.dataset.common.convert(
+        path,
+        train(
+            src_dict_size=src_dict_size,
+            trg_dict_size=trg_dict_size,
+            src_lang=src_lang),
+        1000,
+        "wmt16_train")
+    paddle.v2.dataset.common.convert(
+        path,
+        test(
+            src_dict_size=src_dict_size,
+            trg_dict_size=trg_dict_size,
+            src_lang=src_lang),
+        1000,
+        "wmt16_test")
+    paddle.v2.dataset.common.convert(
+        path,
+        validation(
+            src_dict_size=src_dict_size,
+            trg_dict_size=trg_dict_size,
+            src_lang=src_lang),
+        1000,
+        "wmt16_validation")
diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index e72b22c83f..b2183ebda1 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -19,13 +19,32 @@ import contextlib
 from ..registry import autodoc
 
 __all__ = [
-    'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard',
-    'BlockGuardWithCompletion', 'StaticRNNMemoryLink', 'WhileGuard', 'While',
-    'lod_rank_table', 'max_sequence_len', 'topk', 'lod_tensor_to_array',
-    'array_to_lod_tensor', 'increment', 'array_write', 'create_array',
-    'less_than', 'array_read', 'shrink_memory', 'array_length', 'IfElse',
-    'DynamicRNN', 'ConditionalBlock', 'StaticRNN', 'reorder_lod_tensor_by_rank',
-    'ParallelDo', 'Print'
+    'split_lod_tensor',
+    'merge_lod_tensor',
+    'BlockGuard',
+    'BlockGuardWithCompletion',
+    'StaticRNNMemoryLink',
+    'WhileGuard',
+    'While',
+    'lod_rank_table',
+    'max_sequence_len',
+    'topk',
+    'lod_tensor_to_array',
+    'array_to_lod_tensor',
+    'increment',
+    'array_write',
+    'create_array',
+    'less_than',
+    'array_read',
+    'shrink_memory',
+    'array_length',
+    'IfElse',
+    'DynamicRNN',
+    'ConditionalBlock',
+    'StaticRNN',
+    'reorder_lod_tensor_by_rank',
+    'ParallelDo',
+    'Print',
 ]
 
 

From 19c554f9e4ef5c96e47f65efd44e2524417e38d7 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Fri, 19 Jan 2018 19:19:35 +0800
Subject: [PATCH 16/22] update

---
 python/paddle/v2/fluid/clip.py                | 82 +++++++++----------
 .../v2/fluid/tests/test_gradient_clip.py      | 44 +++++-----
 2 files changed, 59 insertions(+), 67 deletions(-)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index d97cd9ecc9..fb0907c9f4 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -112,58 +112,52 @@ class GradientClipByNorm(BaseGradientClipAttr):
 
 
 class GradientClipByGlobalNorm(BaseGradientClipAttr):
-    global_norm_var = None
-    local_norm_var = None
-    clip_norm_var = None
-    scale_var = None
-
-    @classmethod
-    def init(cls, clip_norm):
-        if not (isinstance(clip_norm, int) or isinstance(clip_norm, float)):
-            raise TypeError("The 'clip_norm' must be a value of int or float")
-
-        cls.global_norm_var = layers.fill_constant(
-            shape=[1], dtype="float32", value=0.0)
-        cls.local_norm_var = layers.create_tensor(dtype="float32")
-        cls.clip_norm_var = layers.fill_constant(
-            shape=[1], dtype="float32", value=clip_norm)
-
-    @classmethod
-    def check_init(cls):
-        if not (isinstance(cls.global_norm_var, framework.Variable) and
-                isinstance(cls.local_norm_var, framework.Variable) and
-                isinstance(cls.clip_norm_var, framework.Variable)):
-            raise ValueError(
-                "Class 'GradientClipByGlobalNorm' has not been properly initialized. \
-                 Please call GradientClipByGlobalNorm.init() first.")
+    def __init__(self, clip_norm, group_name="default_group"):
+        if not isinstance(group_name, basestring):
+            raise TypeError("'group_name' must be a basestring.")
+
+        self.clip_norm = clip_norm
+        self.group_name = group_name
 
     def process_context(self, context, param, grad):
-        cls = self.__class__
-        cls.check_init()
+        if self.group_name not in context:
+            context[self.group_name] = []
+            context[self.group_name + "_clip_value"] = self.clip_norm
+            context[self.group_name + "_clip"] = layers.fill_constant(
+                shape=[1], dtype="float32", value=self.clip_norm)
+        else:
+            if not self.clip_norm == context[self.group_name + "_clip_value"]:
+                raise ValueError(
+                    "All parameters' 'clip_norm' of a same group should be the same"
+                )
 
-        cls.local_norm_var = layers.reduce_sum(
-            input=layers.pow(x=grad, factor=2.0))
-        layers.sums(
-            input=[cls.local_norm_var, cls.global_norm_var],
-            out=[cls.global_norm_var])
+        local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0))
+        context[self.group_name].append(local_norm_var)
 
-    def create_operators(self, param, grad):
-        cls = self.__class__
-        cls.check_init()
+        self.context = context
 
-        if cls.scale_var is None:
-            layers.sqrt(x=cls.global_norm_var, out=cls.global_norm_var)
-            cls.scale_var = layers.elementwise_div(
-                x=cls.clip_norm_var,
+    def create_operators(self, param, grad):
+        group_scale_name = self.group_name + "_scale"
+        if group_scale_name not in self.context:
+            group_norm_var = layers.sums(input=self.context[self.group_name])
+            layers.sqrt(x=group_norm_var, out=group_norm_var)
+            clip_var = self.context[self.group_name + "_clip"]
+            group_scale_var = layers.elementwise_div(
+                x=clip_var,
                 y=layers.elementwise_max(
-                    x=cls.clip_norm_var, y=cls.global_norm_var))
-            assert cls.scale_var.shape == (1L, )
+                    x=clip_var, y=group_norm_var))
+            assert group_scale_var.shape == (1L, )
+            self.context[group_scale_name] = group_scale_var
 
-        new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var)
+        new_grad = layers.elementwise_mul(
+            x=grad, y=self.context[group_scale_name])
         return param, new_grad
 
 
-def gradient_clip_by_global_norm(clip_norm, param_list=None, program=None):
+def gradient_clip_by_global_norm(clip_norm,
+                                 param_list=None,
+                                 group_name="default_group",
+                                 program=None):
     if program is None:
         program = framework.default_main_program()
     if param_list is None:
@@ -175,9 +169,9 @@ def gradient_clip_by_global_norm(clip_norm, param_list=None, program=None):
             "'param_list' should be a list of Parameter or basestring(parameter's name)."
         )
 
-    GradientClipByGlobalNorm.init(clip_norm)
     for param in param_list:
-        param.gradient_clip_attr = GradientClipByGlobalNorm()
+        param.gradient_clip_attr = GradientClipByGlobalNorm(clip_norm,
+                                                            group_name)
 
 
 def append_gradient_clip_ops(param_grad):
diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py
index 4fb7f0b2cb..75c5fd9892 100644
--- a/python/paddle/v2/fluid/tests/test_gradient_clip.py
+++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py
@@ -15,21 +15,10 @@ import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
 
-
-def _get_global_param_norm_(params_grads):
-    res = fluid.layers.fill_constant(shape=[1], dtype="float32", value=0.0)
-    for _, grad in params_grads:
-        norm_var = fluid.layers.reduce_sum(
-            input=fluid.layers.pow(x=grad, factor=2.0))
-        fluid.layers.sums(input=[norm_var, res], out=[res])
-    fluid.layers.sqrt(x=res, out=res)
-    return res
-
-
 BATCH_SIZE = 128
-CLIP = 0.5
-prog = fluid.framework.Program()
+CLIP = 1
 
+prog = fluid.framework.Program()
 with fluid.program_guard(main_program=prog):
     image = fluid.layers.data(name='x', shape=[784], dtype='float32')
 
@@ -49,13 +38,12 @@ avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
 p_g = fluid.backward.append_backward(loss=avg_cost)
 p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
 
-with fluid.program_guard(main_program=prog):
-    gloabl_norm = _get_global_param_norm_(p_g)
-
 with fluid.program_guard(main_program=prog_clip):
     fluid.clip.gradient_clip_by_global_norm(clip_norm=CLIP)
     p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
-    gloabl_norm_clip = _get_global_param_norm_(p_g_clip)
+
+grad_list = [elem[1] for elem in p_g]
+grad_clip_list = [elem[1] for elem in p_g_clip]
 
 train_reader = paddle.batch(
     paddle.reader.shuffle(
@@ -72,11 +60,21 @@ for data in train_reader():
     count += 1
     if count > 5:
         break
-    out, = exe.run(prog, feed=feeder.feed(data), fetch_list=[gloabl_norm])
-    out_clip, = exe.run(prog_clip,
-                        feed=feeder.feed(data),
-                        fetch_list=[gloabl_norm_clip])
-
-    if not np.allclose(out_clip, np.minimum(out, np.array([CLIP]))):
+    out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
+    out_clip = exe.run(prog_clip,
+                       feed=feeder.feed(data),
+                       fetch_list=grad_clip_list)
+    global_norm = 0
+    for v in out[1:]:
+        global_norm += np.sum(np.power(v, 2))
+    global_norm = np.sqrt(global_norm)
+
+    global_norm_clip = 0
+    for v in out_clip[1:]:
+        global_norm_clip += np.sum(np.power(v, 2))
+    global_norm_clip = np.sqrt(global_norm_clip)
+
+    if not np.isclose(
+            a=global_norm_clip, b=np.minimum(global_norm, CLIP), rtol=5e-3):
         exit(1)
 exit(0)

From 2f344e7f0f85f65ce0f5867ac5985d5576d07395 Mon Sep 17 00:00:00 2001
From: ying <lcy.seso@gmail.com>
Date: Mon, 22 Jan 2018 10:22:34 +0800
Subject: [PATCH 17/22] fix name convention.

---
 python/paddle/v2/dataset/wmt14.py | 12 ++++++------
 python/paddle/v2/dataset/wmt16.py | 32 +++++++++++++++----------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
index 1e54a4999b..5104e29051 100644
--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -50,8 +50,8 @@ UNK = "<unk>"
 UNK_IDX = 2
 
 
-def __read_to_dict__(tar_file, dict_size):
-    def __to_dict__(fd, size):
+def __read_to_dict(tar_file, dict_size):
+    def __to_dict(fd, size):
         out_dict = dict()
         for line_count, line in enumerate(fd):
             if line_count < size:
@@ -66,19 +66,19 @@ def __read_to_dict__(tar_file, dict_size):
             if each_item.name.endswith("src.dict")
         ]
         assert len(names) == 1
-        src_dict = __to_dict__(f.extractfile(names[0]), dict_size)
+        src_dict = __to_dict(f.extractfile(names[0]), dict_size)
         names = [
             each_item.name for each_item in f
             if each_item.name.endswith("trg.dict")
         ]
         assert len(names) == 1
-        trg_dict = __to_dict__(f.extractfile(names[0]), dict_size)
+        trg_dict = __to_dict(f.extractfile(names[0]), dict_size)
         return src_dict, trg_dict
 
 
 def reader_creator(tar_file, file_name, dict_size):
     def reader():
-        src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
+        src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
         with tarfile.open(tar_file, mode='r') as f:
             names = [
                 each_item.name for each_item in f
@@ -160,7 +160,7 @@ def get_dict(dict_size, reverse=True):
     # if reverse = False, return dict = {'a':'001', 'b':'002', ...}
     # else reverse = true, return dict = {'001':'a', '002':'b', ...}
     tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
-    src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
+    src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
     if reverse:
         src_dict = {v: k for k, v in src_dict.items()}
         trg_dict = {v: k for k, v in trg_dict.items()}
diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/v2/dataset/wmt16.py
index a1899f20b5..bbc28a2da9 100644
--- a/python/paddle/v2/dataset/wmt16.py
+++ b/python/paddle/v2/dataset/wmt16.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-ACL2016 Multimodal Machine Translation. Please see this websit for more details:
-http://www.statmt.org/wmt16/multimodal-task.html#task1
+ACL2016 Multimodal Machine Translation. Please see this website for more
+details: http://www.statmt.org/wmt16/multimodal-task.html#task1
 
 If you use the dataset created for your task, please cite the following paper:
 Multi30K: Multilingual English-German Image Descriptions.
@@ -56,7 +56,7 @@ END_MARK = "<e>"
 UNK_MARK = "<unk>"
 
 
-def __build_dict__(tar_file, dict_size, save_path, lang):
+def __build_dict(tar_file, dict_size, save_path, lang):
     word_dict = defaultdict(int)
     with tarfile.open(tar_file, mode="r") as f:
         for line in f.extractfile("wmt16/train"):
@@ -75,12 +75,12 @@ def __build_dict__(tar_file, dict_size, save_path, lang):
             fout.write("%s\n" % (word[0]))
 
 
-def __load_dict__(tar_file, dict_size, lang, reverse=False):
+def __load_dict(tar_file, dict_size, lang, reverse=False):
     dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
                              "wmt16/%s_%d.dict" % (lang, dict_size))
     if not os.path.exists(dict_path) or (
             len(open(dict_path, "r").readlines()) != dict_size):
-        __build_dict__(tar_file, dict_size, dict_path, lang)
+        __build_dict(tar_file, dict_size, dict_path, lang)
 
     word_dict = {}
     with open(dict_path, "r") as fdict:
@@ -92,7 +92,7 @@ def __load_dict__(tar_file, dict_size, lang, reverse=False):
     return word_dict
 
 
-def __get_dict_size__(src_dict_size, trg_dict_size, src_lang):
+def __get_dict_size(src_dict_size, trg_dict_size, src_lang):
     src_dict_size = min(src_dict_size, (TOTAL_EN_WORDS if src_lang == "en" else
                                         TOTAL_DE_WORDS))
     trg_dict_size = min(trg_dict_size, (TOTAL_DE_WORDS if src_lang == "en" else
@@ -102,9 +102,9 @@ def __get_dict_size__(src_dict_size, trg_dict_size, src_lang):
 
 def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
     def reader():
-        src_dict = __load_dict__(tar_file, src_dict_size, src_lang)
-        trg_dict = __load_dict__(tar_file, trg_dict_size,
-                                 ("de" if src_lang == "en" else "en"))
+        src_dict = __load_dict(tar_file, src_dict_size, src_lang)
+        trg_dict = __load_dict(tar_file, trg_dict_size,
+                               ("de" if src_lang == "en" else "en"))
 
         # the indice for start mark, end mark, and unk are the same in source
         # language and target language. Here uses the source language
@@ -173,8 +173,8 @@ def train(src_dict_size, trg_dict_size, src_lang="en"):
 
     assert (src_lang in ["en", "de"], ("An error language type.  Only support: "
                                        "en (for English); de(for Germany)"))
-    src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size,
-                                                     trg_dict_size, src_lang)
+    src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size,
+                                                   src_lang)
 
     return reader_creator(
         tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
@@ -222,8 +222,8 @@ def test(src_dict_size, trg_dict_size, src_lang="en"):
             ("An error language type.  "
              "Only support: en (for English); de(for Germany)"))
 
-    src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size,
-                                                     trg_dict_size, src_lang)
+    src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size,
+                                                   src_lang)
 
     return reader_creator(
         tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
@@ -269,8 +269,8 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"):
     assert (src_lang in ["en", "de"],
             ("An error language type.  "
              "Only support: en (for English); de(for Germany)"))
-    src_dict_size, trg_dict_size = __get_dict_size__(src_dict_size,
-                                                     trg_dict_size, src_lang)
+    src_dict_size, trg_dict_size = __get_dict_size(src_dict_size, trg_dict_size,
+                                                   src_lang)
 
     return reader_creator(
         tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
@@ -308,7 +308,7 @@ def get_dict(lang, dict_size, reverse=False):
             "Please invoke paddle.dataset.wmt16.train/test/validation "
             "first to build the dictionary.")
     tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz")
-    return __load_dict__(tar_file, dict_size, lang, reverse)
+    return __load_dict(tar_file, dict_size, lang, reverse)
 
 
 def fetch():

From 2024489bb85175ff77e27910b50f032e0b485325 Mon Sep 17 00:00:00 2001
From: Yang Yu <yuyang18@baidu.com>
Date: Mon, 22 Jan 2018 11:59:23 +0800
Subject: [PATCH 18/22] Fix CI

---
 paddle/operators/compare_op.h              | 2 +-
 paddle/operators/elementwise_op_function.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/operators/compare_op.h b/paddle/operators/compare_op.h
index 15e9cfcaab..9c655d6c0d 100644
--- a/paddle/operators/compare_op.h
+++ b/paddle/operators/compare_op.h
@@ -54,7 +54,7 @@ class CompareOpKernel
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     using T = typename Functor::ELEM_TYPE;
-    ElementwiseComputeEx<Functor, DeviceContext, T>(context);
+    ElementwiseComputeEx<Functor, DeviceContext, T, bool>(context);
   }
 };
 
diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h
index e6f3e39ece..d749b8e875 100644
--- a/paddle/operators/elementwise_op_function.h
+++ b/paddle/operators/elementwise_op_function.h
@@ -373,7 +373,7 @@ void ElementwiseComputeEx(const framework::ExecutionContext& ctx) {
   auto* x = ctx.Input<Tensor>("X");
   auto* y = ctx.Input<Tensor>("Y");
   auto* z = ctx.Output<Tensor>("Out");
-  z->mutable_data<T>(ctx.GetPlace());
+  z->mutable_data<OutType>(ctx.GetPlace());
   TransformFunctor<Functor, T, DeviceContext, OutType> functor(
       x, y, z, ctx.template device_context<DeviceContext>(), Functor());
 

From d3d855fbc81149bea6050c3518b041583999424f Mon Sep 17 00:00:00 2001
From: Yang Yu <yuyang18@baidu.com>
Date: Mon, 22 Jan 2018 12:22:27 +0800
Subject: [PATCH 19/22] Fix unittest

---
 python/paddle/v2/fluid/tests/test_registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/v2/fluid/tests/test_registry.py b/python/paddle/v2/fluid/tests/test_registry.py
index 4fddb91268..44e50ca55a 100644
--- a/python/paddle/v2/fluid/tests/test_registry.py
+++ b/python/paddle/v2/fluid/tests/test_registry.py
@@ -28,4 +28,4 @@ class TestRegistry(unittest.TestCase):
         exe = fluid.Executor(place)
         X = np.random.random((10, 10)).astype("float32")
         mean_out = exe.run(feed={"X": X}, fetch_list=[output])
-        self.assertAlmostEqual(np.mean(X), mean_out)
+        self.assertAlmostEqual(np.mean(X), mean_out[0])

From 5c26f60875a3ce9fcc86c74dcf6884aa4a01c966 Mon Sep 17 00:00:00 2001
From: Yang Yu <yuyang18@baidu.com>
Date: Mon, 22 Jan 2018 12:33:14 +0800
Subject: [PATCH 20/22] Fix license

---
 python/paddle/v2/fluid/layers/control_flow.py | 10 +++++-----
 python/paddle/v2/fluid/layers/ops.py          | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index 67c3b2e9ac..a7fc4f6539 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -6,11 +6,11 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import contextlib
 
 from layer_function_generator import autodoc
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index 603de1f600..19dc0fdeee 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -6,11 +6,11 @@
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from layer_function_generator import generate_layer_fn
 
 __activations__ = [

From e8adcaf27855e452dea5b2deaddb830363cd3964 Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Mon, 22 Jan 2018 12:50:45 +0800
Subject: [PATCH 21/22] update

---
 python/paddle/v2/fluid/clip.py                                 | 1 +
 python/paddle/v2/fluid/layers/ops.py                           | 1 +
 python/paddle/v2/fluid/param_attr.py                           | 1 +
 python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py | 1 +
 python/paddle/v2/fluid/tests/test_gradient_clip.py             | 1 +
 5 files changed, 5 insertions(+)

diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index 777a39e105..386df9823d 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import functools
 import layers
 import framework
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index 7e52dc4c34..d296076162 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from ..registry import register_layer
 
 __activations__ = [
diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py
index 17fcb262ef..dcca8b6c54 100644
--- a/python/paddle/v2/fluid/param_attr.py
+++ b/python/paddle/v2/fluid/param_attr.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from initializer import Initializer, Xavier, Constant
 from regularizer import WeightDecayRegularizer
 
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
index 2fde3707da..8776a65bf8 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/test_gradient_clip.py b/python/paddle/v2/fluid/tests/test_gradient_clip.py
index 75c5fd9892..4e6e6a1ef6 100644
--- a/python/paddle/v2/fluid/tests/test_gradient_clip.py
+++ b/python/paddle/v2/fluid/tests/test_gradient_clip.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid

From a6da470b11dd803045dd1be6c99069a2def48198 Mon Sep 17 00:00:00 2001
From: QI JUN <qijun1994@hotmail.com>
Date: Mon, 22 Jan 2018 14:21:44 +0800
Subject: [PATCH 22/22] add memory optimization transpiler demo (#7443)

* add memory optimization transpiler demo

* add memory benchmark compile option

* add gflags instead of macro

* refine code
---
 paddle/framework/executor.cc                  |  11 ++
 paddle/framework/scope.cc                     |  12 +-
 python/paddle/v2/fluid/__init__.py            |   4 +-
 python/paddle/v2/fluid/tests/CMakeLists.txt   |   1 +
 .../book_memory_optimization/CMakeLists.txt   |  11 ++
 .../test_memopt_fit_a_line.py                 |  44 ++++++
 .../test_memopt_image_classification_train.py | 133 ++++++++++++++++++
 7 files changed, 213 insertions(+), 3 deletions(-)
 create mode 100644 python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt
 create mode 100644 python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
 create mode 100644 python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py

diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc
index c0418c9266..1382bfca19 100644
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -23,6 +23,7 @@ limitations under the License. */
 #include "paddle/framework/op_registry.h"
 #include "paddle/platform/place.h"
 
+DECLARE_bool(do_memory_benchmark);
 DEFINE_bool(check_nan_inf, false,
             "Checking whether operator produce NAN/INF or not. It will be "
             "extremely slow so please use this flag wisely.");
@@ -117,6 +118,10 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
     auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
     VLOG(3) << op->DebugStringEx(local_scope);
     op->Run(*local_scope, place_);
+    if (FLAGS_do_memory_benchmark) {
+      VLOG(2) << "Memory used after operator " + op->Type() + " running: "
+              << memory::memory_usage(place_);
+    }
     if (FLAGS_check_nan_inf) {
       for (auto& vname : op->OutputVars(true)) {
         auto* var = local_scope->FindVar(vname);
@@ -130,6 +135,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
   if (create_vars && create_local_scope) {
     scope->DeleteScope(local_scope);
   }
+  if (FLAGS_do_memory_benchmark) {
+    VLOG(2) << "-------------------------------------------------------";
+    VLOG(2) << "Memory used after deleting local scope: "
+            << memory::memory_usage(place_);
+    VLOG(2) << "-------------------------------------------------------";
+  }
 }
 
 }  // namespace framework
diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc
index 2bd0ac8f5a..a67ff91009 100644
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@@ -20,6 +20,10 @@ limitations under the License. */
 #include "paddle/framework/threadpool.h"
 #include "paddle/string/printf.h"
 
+DEFINE_bool(do_memory_benchmark, false,
+            "Doing memory benchmark. It will make deleting scope synchronized, "
+            "and add some memory usage logs");
+
 namespace paddle {
 namespace framework {
 
@@ -88,8 +92,12 @@ void Scope::DeleteScope(Scope* scope) {
   auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
   PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
   this->kids_.erase(it);
-  // Make delete async.
-  Async([scope] { delete scope; });
+  // When making memory benchmark on Fluid, we have to delete scope sync.
+  if (FLAGS_do_memory_benchmark) {
+    delete scope;
+  } else {
+    Async([scope] { delete scope; });
+  }
 }
 
 void Scope::Rename(const std::string& origin_name,
diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py
index e91eaa4f35..1f041c7459 100644
--- a/python/paddle/v2/fluid/__init__.py
+++ b/python/paddle/v2/fluid/__init__.py
@@ -86,7 +86,9 @@ def __bootstrap__():
 
     os.environ['OMP_NUM_THREADS'] = str(num_threads)
 
-    read_env_flags = ['use_pinned_memory', 'check_nan_inf']
+    read_env_flags = [
+        'use_pinned_memory', 'check_nan_inf', 'do_memory_benchmark'
+    ]
     if core.is_compile_gpu():
         read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync']
     core.init_gflags([sys.argv[0]] +
diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt
index 9a0240cbf6..8305316082 100644
--- a/python/paddle/v2/fluid/tests/CMakeLists.txt
+++ b/python/paddle/v2/fluid/tests/CMakeLists.txt
@@ -6,3 +6,4 @@ endforeach()
 
 add_subdirectory(book)
 add_subdirectory(book_distribute)
+add_subdirectory(book_memory_optimization)
diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt b/python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt
new file mode 100644
index 0000000000..213af5d27f
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt
@@ -0,0 +1,11 @@
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+
+list(REMOVE_ITEM TEST_OPS test_memopt_image_classification_train)
+py_test(test_memopt_image_classification_train_resnet SRCS test_memopt_image_classification_train.py ARGS resnet)
+py_test(test_memopt_image_classification_train_vgg SRCS test_memopt_image_classification_train.py ARGS vgg)
+
+# default test
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
new file mode 100644
index 0000000000..6206fcc4be
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
@@ -0,0 +1,44 @@
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+
+y_predict = fluid.layers.fc(input=x, size=1, act=None)
+
+y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+
+cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+avg_cost = fluid.layers.mean(x=cost)
+
+sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
+sgd_optimizer.minimize(avg_cost)
+
+# memopt_program = fluid.default_main_program()
+memopt_program = fluid.memory_optimize(fluid.default_main_program())
+
+BATCH_SIZE = 200
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.train(), buf_size=500),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
+exe = fluid.Executor(place)
+
+exe.run(fluid.default_startup_program())
+
+PASS_NUM = 100
+for pass_id in range(PASS_NUM):
+    fluid.io.save_persistables(exe, "./fit_a_line.model/")
+    fluid.io.load_persistables(exe, "./fit_a_line.model/")
+    for data in train_reader():
+        avg_loss_value, = exe.run(memopt_program,
+                                  feed=feeder.feed(data),
+                                  fetch_list=[avg_cost])
+
+        if avg_loss_value[0] < 10.0:
+            exit(0)  # if avg cost less than 10.0, we think our code is good.
+exit(1)
diff --git a/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py
new file mode 100644
index 0000000000..cc37f773c4
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py
@@ -0,0 +1,133 @@
+from __future__ import print_function
+
+import sys
+
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+
+def resnet_cifar10(input, depth=32):
+    def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
+        tmp = fluid.layers.conv2d(
+            input=input,
+            filter_size=filter_size,
+            num_filters=ch_out,
+            stride=stride,
+            padding=padding,
+            act=None,
+            bias_attr=False)
+        return fluid.layers.batch_norm(input=tmp, act=act)
+
+    def shortcut(input, ch_in, ch_out, stride):
+        if ch_in != ch_out:
+            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
+        else:
+            return input
+
+    def basicblock(input, ch_in, ch_out, stride):
+        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
+        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
+        short = shortcut(input, ch_in, ch_out, stride)
+        return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
+
+    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
+        tmp = block_func(input, ch_in, ch_out, stride)
+        for i in range(1, count):
+            tmp = block_func(tmp, ch_out, ch_out, 1)
+        return tmp
+
+    assert (depth - 2) % 6 == 0
+    n = (depth - 2) / 6
+    conv1 = conv_bn_layer(
+        input=input, ch_out=16, filter_size=3, stride=1, padding=1)
+    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
+    pool = fluid.layers.pool2d(
+        input=res3, pool_size=8, pool_type='avg', pool_stride=1)
+    return pool
+
+
+def vgg16_bn_drop(input):
+    def conv_block(input, num_filter, groups, dropouts):
+        return fluid.nets.img_conv_group(
+            input=input,
+            pool_size=2,
+            pool_stride=2,
+            conv_num_filter=[num_filter] * groups,
+            conv_filter_size=3,
+            conv_act='relu',
+            conv_with_batchnorm=True,
+            conv_batchnorm_drop_rate=dropouts,
+            pool_type='max')
+
+    conv1 = conv_block(input, 64, 2, [0.3, 0])
+    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
+    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
+    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
+    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
+
+    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
+    fc1 = fluid.layers.fc(input=drop, size=512, act=None)
+    bn = fluid.layers.batch_norm(input=fc1, act='relu')
+    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
+    fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
+    return fc2
+
+
+classdim = 10
+data_shape = [3, 32, 32]
+
+images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+
+net_type = "vgg"
+if len(sys.argv) >= 2:
+    net_type = sys.argv[1]
+
+if net_type == "vgg":
+    print("train vgg net")
+    net = vgg16_bn_drop(images)
+elif net_type == "resnet":
+    print("train resnet")
+    net = resnet_cifar10(images, 32)
+else:
+    raise ValueError("%s network is not supported" % net_type)
+
+predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
+cost = fluid.layers.cross_entropy(input=predict, label=label)
+avg_cost = fluid.layers.mean(x=cost)
+
+optimizer = fluid.optimizer.Adam(learning_rate=0.001)
+opts = optimizer.minimize(avg_cost)
+
+accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
+
+# memopt_program = fluid.default_main_program()
+memopt_program = fluid.memory_optimize(fluid.default_main_program())
+
+BATCH_SIZE = 128
+PASS_NUM = 1
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.cifar.train10(), buf_size=128 * 10),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
+exe.run(fluid.default_startup_program())
+
+for pass_id in range(PASS_NUM):
+    accuracy.reset(exe)
+    for data in train_reader():
+        loss, acc = exe.run(memopt_program,
+                            feed=feeder.feed(data),
+                            fetch_list=[avg_cost] + accuracy.metrics)
+        pass_acc = accuracy.eval(exe)
+        print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
+            pass_acc))
+        # this model is slow, so if we can train two mini batch, we think it works properly.
+        exit(0)
+exit(1)