From 67308822f85a387433867ea330624b9c16ae029c Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Sun, 30 Sep 2018 19:45:05 +0800
Subject: [PATCH 1/4] Add selected_rows merge for clip_by_norm op

test=develop
---
 paddle/fluid/operators/CMakeLists.txt    |  3 ++-
 paddle/fluid/operators/clip_by_norm_op.h | 24 +++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index b61bca8c3d..e10fc422fa 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -229,7 +229,7 @@ if(WITH_DISTRIBUTE)
         op_library(${dist_op} DEPS ${DISTRIBUTE_DEPS})
         set_source_files_properties(${dist_op}.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
     endforeach()
-    
+
     #set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
     #cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op
     #        listen_and_serv_op sum_op executor SERIAL)
@@ -267,6 +267,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
 else()
     set(DEPS_OPS ${DEPS_OPS} tensorrt_engine_op)
 endif()
+op_library(clip_by_norm_op DEPS selected_rows_functor)
 op_library(sum_op DEPS selected_rows_functor)
 op_library(sgd_op DEPS selected_rows_functor)
 op_library(print_op DEPS lod_tensor)
diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h
index 5af0eb0b2a..8346115913 100644
--- a/paddle/fluid/operators/clip_by_norm_op.h
+++ b/paddle/fluid/operators/clip_by_norm_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/transform.h"
 
 namespace paddle {
@@ -31,10 +32,31 @@ class ClipByNormKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto max_norm = context.Attr<T>("max_norm");
-    auto* input = context.Input<Tensor>("X");
+    auto in_var = context.InputVar("X");
     auto* output = context.Output<Tensor>("Out");
     output->mutable_data<T>(context.GetPlace());
 
+    const Tensor* input = nullptr;
+    if (in_var->IsType<framework::LoDTensor>()) {
+      input = context.Input<Tensor>("X");
+    } else if (in_var->IsType<framework::SelectedRows>()) {
+      auto* x = context.Input<framework::SelectedRows>("X");
+
+      // merge ids in selected rows first
+      math::scatter::MergeAdd<DeviceContext, T> merge_func;
+      auto* merged_input = const_cast<framework::Scope&>(context.scope())
+                               .Var()
+                               ->GetMutable<framework::SelectedRows>();
+      merge_func(context.template device_context<DeviceContext>(), *x,
+                 merged_input);
+      input = &(merged_input->value());
+    } else {
+      PADDLE_THROW("Unexpected branch, input variable type is %s",
+                   in_var->Type().name());
+    }
+
+    PADDLE_ENFORCE_NOT_NULL(input);
+
     auto x = EigenVector<T>::Flatten(*input);
     auto out = EigenVector<T>::Flatten(*output);
     auto x_norm = x.square().sum().sqrt();

From f20fc955395a907e68136dd4fccce29660f5d140 Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Sat, 6 Oct 2018 20:18:09 +0800
Subject: [PATCH 2/4] Resize output ddims and rows

---
 paddle/fluid/operators/clip_by_norm_op.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h
index 8346115913..7144524a4c 100644
--- a/paddle/fluid/operators/clip_by_norm_op.h
+++ b/paddle/fluid/operators/clip_by_norm_op.h
@@ -33,12 +33,14 @@ class ClipByNormKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& context) const override {
     auto max_norm = context.Attr<T>("max_norm");
     auto in_var = context.InputVar("X");
-    auto* output = context.Output<Tensor>("Out");
-    output->mutable_data<T>(context.GetPlace());
 
+    Tensor* output = nullptr;
     const Tensor* input = nullptr;
     if (in_var->IsType<framework::LoDTensor>()) {
       input = context.Input<Tensor>("X");
+
+      output = context.Output<Tensor>("Out");
+      output->mutable_data<T>(context.GetPlace());
     } else if (in_var->IsType<framework::SelectedRows>()) {
       auto* x = context.Input<framework::SelectedRows>("X");
 
@@ -50,6 +52,11 @@ class ClipByNormKernel : public framework::OpKernel<T> {
       merge_func(context.template device_context<DeviceContext>(), *x,
                  merged_input);
       input = &(merged_input->value());
+
+      auto* output_selected_rows = context.Output<SelectedRows>("Out");
+      output_selected_rows->set_rows(merged_input.rows());
+      output = output_selected_rows->mutable_data();
+      output->Resize(framework::make_ddim(merged_input.value().dims()));
     } else {
       PADDLE_THROW("Unexpected branch, input variable type is %s",
                    in_var->Type().name());

From bcd8c2ccc35f48a6563715562f525d30ac498e6f Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Mon, 8 Oct 2018 15:51:36 +0800
Subject: [PATCH 3/4] Add unit test

---
 paddle/fluid/operators/CMakeLists.txt         |  2 +-
 paddle/fluid/operators/clip_by_norm_op.h      | 22 ++++++-----
 .../tests/unittests/test_clip_by_norm_op.py   | 38 +++++++++++++++++++
 3 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index e10fc422fa..cafd7b11ae 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -267,7 +267,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
 else()
     set(DEPS_OPS ${DEPS_OPS} tensorrt_engine_op)
 endif()
-op_library(clip_by_norm_op DEPS selected_rows_functor)
+op_library(clip_by_norm_op DEPS selected_rows_functor selected_rows)
 op_library(sum_op DEPS selected_rows_functor)
 op_library(sgd_op DEPS selected_rows_functor)
 op_library(print_op DEPS lod_tensor)
diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h
index 7144524a4c..9f99c8a3f9 100644
--- a/paddle/fluid/operators/clip_by_norm_op.h
+++ b/paddle/fluid/operators/clip_by_norm_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/transform.h"
 
@@ -23,6 +24,7 @@ namespace paddle {
 namespace operators {
 
 using Tensor = framework::Tensor;
+using SelectedRows = framework::SelectedRows;
 template <typename T, int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
 using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
@@ -41,22 +43,24 @@ class ClipByNormKernel : public framework::OpKernel<T> {
 
       output = context.Output<Tensor>("Out");
       output->mutable_data<T>(context.GetPlace());
-    } else if (in_var->IsType<framework::SelectedRows>()) {
-      auto* x = context.Input<framework::SelectedRows>("X");
+    } else if (in_var->IsType<SelectedRows>()) {
+      auto* x = context.Input<SelectedRows>("X");
 
       // merge ids in selected rows first
       math::scatter::MergeAdd<DeviceContext, T> merge_func;
-      auto* merged_input = const_cast<framework::Scope&>(context.scope())
-                               .Var()
-                               ->GetMutable<framework::SelectedRows>();
+      SelectedRows* merged_input =
+          const_cast<framework::Scope&>(context.scope())
+              .Var()
+              ->GetMutable<SelectedRows>();
       merge_func(context.template device_context<DeviceContext>(), *x,
                  merged_input);
       input = &(merged_input->value());
 
-      auto* output_selected_rows = context.Output<SelectedRows>("Out");
-      output_selected_rows->set_rows(merged_input.rows());
-      output = output_selected_rows->mutable_data();
-      output->Resize(framework::make_ddim(merged_input.value().dims()));
+      SelectedRows* output_selected_rows = context.Output<SelectedRows>("Out");
+      output_selected_rows->set_rows(merged_input->rows());
+      output_selected_rows->set_height(merged_input->height());
+      output = output_selected_rows->mutable_value();
+      output->Resize(merged_input->value().dims());
     } else {
       PADDLE_THROW("Unexpected branch, input variable type is %s",
                    in_var->Type().name());
diff --git a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
index 6103c3aafc..6556c0875e 100644
--- a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
@@ -18,6 +18,8 @@ import unittest
 import numpy as np
 from op_test import OpTest
 
+import paddle.fluid.core as core
+
 
 class TestClipByNormOp(OpTest):
     def setUp(self):
@@ -62,5 +64,41 @@ class TestCase3(TestClipByNormOp):
         self.max_norm = 1.0
 
 
+class TestClipByNormOpWithSelectedRows(OpTest):
+    def setUp(self):
+        self.initTestCase()
+
+        self.max_relative_error = 0.006
+
+        scope = core.Scope()
+        x_selected_rows = scope.var('X').get_selected_rows()
+        x_selected_rows.set_rows([1, 1, 2, 0])
+        x_tensor = x_selected_rows.get_tensor()
+        x_tensor = np.random.random((4, 1)).astype("float32")
+        x_tensor[np.abs(x_tensor) < self.max_relative_error] = 0.5
+
+        self.op_type = "clip_by_norm"
+        self.inputs = {'X': x_selected_rows, }
+        self.attrs = {}
+        self.attrs['max_norm'] = self.max_norm
+        y_tensor = np.zeros((3, 1))
+        y_tensor[0::1] = np.sum(x_tensor[0::1], x_tensor[1::1])
+        y_tensor[1::1] = x_tensor[2::1]
+        y_tensor[2::1] = x_tensor[3::1]
+        norm = np.sqrt(np.sum(np.square(y_tensor)))
+        if norm > self.max_norm:
+            output = self.max_norm * y_tensor / norm
+        else:
+            output = y_tensor
+        self.outputs = {'Out': output}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def initTestCase(self):
+        self.shape = (100, )
+        self.max_norm = 1.0
+
+
 if __name__ == '__main__':
     unittest.main()

From 1456b8ec7dd7d1a13b7bf3e4d1c14e2a10fb0a38 Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Wed, 10 Oct 2018 18:53:15 +0800
Subject: [PATCH 4/4] Add unittest for clip_by_norm_op with SelectedRows

test=develop
---
 paddle/fluid/operators/clip_by_norm_op.h      |  1 +
 .../tests/unittests/test_clip_by_norm_op.py   | 69 ++++++++++++-------
 2 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h
index 9f99c8a3f9..855c4d7067 100644
--- a/paddle/fluid/operators/clip_by_norm_op.h
+++ b/paddle/fluid/operators/clip_by_norm_op.h
@@ -61,6 +61,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
       output_selected_rows->set_height(merged_input->height());
       output = output_selected_rows->mutable_value();
       output->Resize(merged_input->value().dims());
+      output->mutable_data<T>(context.GetPlace());
     } else {
       PADDLE_THROW("Unexpected branch, input variable type is %s",
                    in_var->Type().name());
diff --git a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
index 6556c0875e..46433d7825 100644
--- a/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
@@ -18,6 +18,7 @@ import unittest
 import numpy as np
 from op_test import OpTest
 
+import paddle.fluid as fluid
 import paddle.fluid.core as core
 
 
@@ -65,39 +66,57 @@ class TestCase3(TestClipByNormOp):
 
 
 class TestClipByNormOpWithSelectedRows(OpTest):
-    def setUp(self):
-        self.initTestCase()
-
-        self.max_relative_error = 0.006
-
+    def check_with_place(self, place):
+        self.config_test_case()
         scope = core.Scope()
+
+        # set input
         x_selected_rows = scope.var('X').get_selected_rows()
-        x_selected_rows.set_rows([1, 1, 2, 0])
+        x_selected_rows.set_rows(self.grad_rows)
         x_tensor = x_selected_rows.get_tensor()
-        x_tensor = np.random.random((4, 1)).astype("float32")
-        x_tensor[np.abs(x_tensor) < self.max_relative_error] = 0.5
-
-        self.op_type = "clip_by_norm"
-        self.inputs = {'X': x_selected_rows, }
-        self.attrs = {}
-        self.attrs['max_norm'] = self.max_norm
-        y_tensor = np.zeros((3, 1))
-        y_tensor[0::1] = np.sum(x_tensor[0::1], x_tensor[1::1])
-        y_tensor[1::1] = x_tensor[2::1]
-        y_tensor[2::1] = x_tensor[3::1]
-        norm = np.sqrt(np.sum(np.square(y_tensor)))
+        x_np = np.random.random(self.grad_shape).astype("float32")
+        x_np[np.abs(x_np) < self.max_relative_error] = 0.5
+        x_tensor.set(x_np, place)
+
+        # set output
+        out_selected_rows = scope.var('Out').get_selected_rows()
+
+        # run clip_by_norm_op
+        clip_by_norm_op = fluid.op.Operator(
+            "clip_by_norm", max_norm=self.max_norm, X='X', Out='Out')
+        clip_by_norm_op.run(scope, place)
+
+        # check output
+        self.assertEqual(out_selected_rows.rows(), self.grad_clipped_rows)
+        out_tensor = out_selected_rows.get_tensor()
+        y_np = np.zeros(self.grad_clipped_shape)
+        y_np[0] = np.sum(x_np[0:2])
+        y_np[1] = x_np[2]
+        y_np[2] = x_np[3]
+        norm = np.sqrt(np.sum(np.square(y_np)))
         if norm > self.max_norm:
-            output = self.max_norm * y_tensor / norm
+            output = self.max_norm * y_np / norm
         else:
-            output = y_tensor
-        self.outputs = {'Out': output}
+            output = y_np
+        self.assertTrue(
+            np.allclose(
+                np.array(out_tensor), output, atol=1e-5, equal_nan=False))
 
-    def test_check_output(self):
-        self.check_output()
+    def test_clip_by_norm_with_selected_ros(self):
+        places = [core.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(core.CUDAPlace(0))
 
-    def initTestCase(self):
-        self.shape = (100, )
+        for place in places:
+            self.check_with_place(place)
+
+    def config_test_case(self):
         self.max_norm = 1.0
+        self.max_relative_error = 0.006
+        self.grad_shape = (4, 1)
+        self.grad_clipped_shape = (3, 1)
+        self.grad_rows = [0, 0, 1, 2]
+        self.grad_clipped_rows = [0, 1, 2]
 
 
 if __name__ == '__main__':