From 58b5b08bba70be296f210cf27bc8696e34ea77f9 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Thu, 14 Sep 2017 17:06:42 -0700
Subject: [PATCH 01/13] prelu op

---
 paddle/operators/prelu_op.cc                  | 78 +++++++++++++++++++
 paddle/operators/prelu_op.cu                  | 18 +++++
 paddle/operators/prelu_op.h                   | 71 +++++++++++++++++
 .../v2/framework/tests/test_prelu_op.py       | 23 ++++++
 4 files changed, 190 insertions(+)
 create mode 100644 paddle/operators/prelu_op.cc
 create mode 100644 paddle/operators/prelu_op.cu
 create mode 100644 paddle/operators/prelu_op.h
 create mode 100644 python/paddle/v2/framework/tests/test_prelu_op.py
diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
new file mode 100644
index 0000000000..831958e3a4
--- /dev/null
+++ b/paddle/operators/prelu_op.cc
@@ -0,0 +1,78 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/prelu_op.h"
+#include "paddle/operators/net_op.h"
+
+namespace paddle {
+namespace operators {
+
+class PreluOp : public framework::OperatorWithKernel {
+ public:
+  PreluOp(const std::string &type, const framework::VariableNameMap &inputs,
+          const framework::VariableNameMap &outputs,
+          const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto *in = ctx.Input<framework::Tensor>("X");
+    auto *out = ctx.Output<framework::LoDTensor>("Out");
+    out->Resize(in->dims());
+  }
+};
+
+template <typename AttrType>
+class PreluOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  PreluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input tensor of prelu operator.").NotInGradient();
+    AddOutput("Out", "The output tensor of prelu operator.").NotInGradient();
+    AddComment(R"DOC(Prelu operator
+
+The equation is:
+f(x) = alpha * x , for x < 0
+f(x) = x         , for x >= 0
+)DOC");
+    AddAttr<AttrType>("alpha", "The scaling factor alpha of prelu.")
+        .SetDefault(0.0);
+  }
+};
+
+// The operator to calculate gradients of a prelu operator.
+class PreluGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto X_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
+    auto X = ctx.Input<Tensor>("X");
+
+    X_grad->Resize(X->dims());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP(prelu, ops::PreluOp, ops::PreluOpMaker<float>, prelu_grad,
+            ops::PreluGradOp);
+REGISTER_OP_CPU_KERNEL(prelu,
+                       ops::PreluKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(prelu_grad,
+                       ops::PreluGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/prelu_op.cu b/paddle/operators/prelu_op.cu
new file mode 100644
index 0000000000..54a9089bdb
--- /dev/null
+++ b/paddle/operators/prelu_op.cu
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/prelu_op.h"
+
+REGISTER_OP_GPU_KERNEL(
+    prelu, paddle::operators::PreluKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
new file mode 100644
index 0000000000..0bb6f61e30
--- /dev/null
+++ b/paddle/operators/prelu_op.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
+
+template <typename Place, typename T, typename AttrType = T>
+class PreluKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* X = context.Input<Tensor>("X");
+    auto* Out = context.Output<Tensor>("Out");
+
+    Out->mutable_data<T>(context.GetPlace());
+
+    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+
+    auto X_vec = EigenVector<T>::Flatten(*X);
+    auto Out_vec = EigenVector<T>::Flatten(*Out);
+
+    auto place = context.GetEigenDevice<Place>();
+
+    Out_vec.device(place) = X_vec.cwiseMax(0.f) + X_vec.cwiseMin(0.f) * alpha;
+  }
+};
+
+template <typename Place, typename T, typename AttrType = T>
+class PreluGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
+    auto* dO = context.Input<Tensor>(framework::GradVarName("Out"));
+
+    auto* Out = context.Output<Tensor>("Out");
+
+    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+
+    dX->mutable_data<T>(context.GetPlace());
+
+    for (int i = 0; i < dX->numel(); ++i) {
+      if (Out->data<T>()[i] > 0) {
+        dX->data<T>()[i] = dO->data<T>()[i];
+      } else {
+        dX->data<T>()[i] = dO->data<T>()[i] * alpha;
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py
new file mode 100644
index 0000000000..8b3916696a
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@@ -0,0 +1,23 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class ScaleTest(OpTest):
+    def setUp(self):
+        self.op_type = "prelu"
+        self.inputs = {'X': np.random.random((10, 10)).astype("float32")}
+        self.attrs = {'alpha': 0.1}
+        out_np = np.maximum(self.inputs['X'], 0.)
+        out_np = out_np + np.minimum(self.inputs['X'], 0.) * self.attrs['alpha']
+        self.outputs = {'Out': self.inputs['X'] * self.attrs['scale']}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
+
+
+if __name__ == "__main__":
+    unittest.main()

From 260026fa678177e7f21390fd560422de5e1b046e Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Thu, 14 Sep 2017 17:23:47 -0700
Subject: [PATCH 02/13] prelu modify

---
 paddle/operators/prelu_op.cu                      | 3 +++
 python/paddle/v2/framework/tests/test_prelu_op.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/paddle/operators/prelu_op.cu b/paddle/operators/prelu_op.cu
index 54a9089bdb..314dcba375 100644
--- a/paddle/operators/prelu_op.cu
+++ b/paddle/operators/prelu_op.cu
@@ -16,3 +16,6 @@
 
 REGISTER_OP_GPU_KERNEL(
     prelu, paddle::operators::PreluKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    prelu_grad,
+    paddle::operators::PreluGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py
index 8b3916696a..c207940d1f 100644
--- a/python/paddle/v2/framework/tests/test_prelu_op.py
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@@ -3,7 +3,7 @@ import numpy as np
 from op_test import OpTest
 
 
-class ScaleTest(OpTest):
+class PreluTest(OpTest):
     def setUp(self):
         self.op_type = "prelu"
         self.inputs = {'X': np.random.random((10, 10)).astype("float32")}

From 490ca5f1aeb5bfebd1a9ba4ac3e27518c979ef44 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Thu, 14 Sep 2017 22:31:12 -0700
Subject: [PATCH 03/13] prelu_op

---
 paddle/operators/prelu_op.cc                  | 16 +++++++-------
 paddle/operators/prelu_op.cu                  | 21 -------------------
 paddle/operators/prelu_op.h                   | 17 +++++++--------
 .../v2/framework/tests/test_prelu_op.py       |  5 +++--
 4 files changed, 20 insertions(+), 39 deletions(-)
 delete mode 100644 paddle/operators/prelu_op.cu

diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
index 831958e3a4..030f320ab9 100644
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@@ -33,20 +33,20 @@ class PreluOp : public framework::OperatorWithKernel {
   }
 };
 
-template <typename AttrType>
+// template <typename AttrType>
 class PreluOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   PreluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "The input tensor of prelu operator.").NotInGradient();
-    AddOutput("Out", "The output tensor of prelu operator.").NotInGradient();
+    AddInput("X", "The input tensor of prelu operator.");
+    AddOutput("Out", "The output tensor of prelu operator.");
     AddComment(R"DOC(Prelu operator
 
 The equation is:
 f(x) = alpha * x , for x < 0
 f(x) = x         , for x >= 0
 )DOC");
-    AddAttr<AttrType>("alpha", "The scaling factor alpha of prelu.")
+    AddAttr<float>("alpha", "The scaling factor alpha of prelu.")
         .SetDefault(0.0);
   }
 };
@@ -58,8 +58,10 @@ class PreluGradOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto X_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
-    auto X = ctx.Input<Tensor>("X");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
+    auto *X_grad =
+        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
+    auto *X = ctx.Input<framework::Tensor>("X");
 
     X_grad->Resize(X->dims());
   }
@@ -70,7 +72,7 @@ class PreluGradOp : public framework::OperatorWithKernel {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP(prelu, ops::PreluOp, ops::PreluOpMaker<float>, prelu_grad,
+REGISTER_OP(prelu, ops::PreluOp, ops::PreluOpMaker, prelu_grad,
             ops::PreluGradOp);
 REGISTER_OP_CPU_KERNEL(prelu,
                        ops::PreluKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/prelu_op.cu b/paddle/operators/prelu_op.cu
deleted file mode 100644
index 314dcba375..0000000000
--- a/paddle/operators/prelu_op.cu
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include "paddle/operators/prelu_op.h"
-
-REGISTER_OP_GPU_KERNEL(
-    prelu, paddle::operators::PreluKernel<paddle::platform::GPUPlace, float>);
-REGISTER_OP_GPU_KERNEL(
-    prelu_grad,
-    paddle::operators::PreluGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index 0bb6f61e30..a1e719e314 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -24,7 +24,7 @@ template <typename T, int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
 using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
 
-template <typename Place, typename T, typename AttrType = T>
+template <typename Place, typename T>
 class PreluKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -33,30 +33,29 @@ class PreluKernel : public framework::OpKernel {
 
     Out->mutable_data<T>(context.GetPlace());
 
-    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+    auto alpha = static_cast<T>(context.Attr<float>("alpha"));
 
     auto X_vec = EigenVector<T>::Flatten(*X);
     auto Out_vec = EigenVector<T>::Flatten(*Out);
 
-    auto place = context.GetEigenDevice<Place>();
-
-    Out_vec.device(place) = X_vec.cwiseMax(0.f) + X_vec.cwiseMin(0.f) * alpha;
+    // auto place = context.GetEigenDevice<Place>();
+    // Out_vec.device(place)
+    Out_vec = X_vec.cwiseMax(0.f) + X_vec.cwiseMin(0.f) * alpha;
   }
 };
 
-template <typename Place, typename T, typename AttrType = T>
+template <typename Place, typename T>
 class PreluGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
     auto* dO = context.Input<Tensor>(framework::GradVarName("Out"));
 
-    auto* Out = context.Output<Tensor>("Out");
+    auto* Out = context.Input<Tensor>("Out");
 
-    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+    auto alpha = static_cast<T>(context.Attr<float>("alpha"));
 
     dX->mutable_data<T>(context.GetPlace());
-
     for (int i = 0; i < dX->numel(); ++i) {
       if (Out->data<T>()[i] > 0) {
         dX->data<T>()[i] = dO->data<T>()[i];
diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py
index c207940d1f..39b6f673fd 100644
--- a/python/paddle/v2/framework/tests/test_prelu_op.py
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@@ -6,11 +6,12 @@ from op_test import OpTest
 class PreluTest(OpTest):
     def setUp(self):
         self.op_type = "prelu"
-        self.inputs = {'X': np.random.random((10, 10)).astype("float32")}
+        self.inputs = {'X': np.random.normal(size=(3, 5)).astype("float32")}
         self.attrs = {'alpha': 0.1}
         out_np = np.maximum(self.inputs['X'], 0.)
         out_np = out_np + np.minimum(self.inputs['X'], 0.) * self.attrs['alpha']
-        self.outputs = {'Out': self.inputs['X'] * self.attrs['scale']}
+        assert out_np is not self.inputs['X']
+        self.outputs = {'Out': out_np}
 
     def test_check_output(self):
         self.check_output()

From c7dfec11ef4cceaf3667fbb3e5ed3d8eca1d25bc Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 15 Sep 2017 12:01:16 -0700
Subject: [PATCH 04/13] fix

---
 paddle/operators/prelu_op.cc | 20 ++++++++++----------
 paddle/operators/prelu_op.h  |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
index 030f320ab9..eafd66579f 100644
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@@ -18,9 +18,9 @@
 namespace paddle {
 namespace operators {
 
-class PreluOp : public framework::OperatorWithKernel {
+class PReluOp : public framework::OperatorWithKernel {
  public:
-  PreluOp(const std::string &type, const framework::VariableNameMap &inputs,
+  PReluOp(const std::string &type, const framework::VariableNameMap &inputs,
           const framework::VariableNameMap &outputs,
           const framework::AttributeMap &attrs)
       : OperatorWithKernel(type, inputs, outputs, attrs) {}
@@ -34,13 +34,13 @@ class PreluOp : public framework::OperatorWithKernel {
 };
 
 // template <typename AttrType>
-class PreluOpMaker : public framework::OpProtoAndCheckerMaker {
+class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
-  PreluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+  PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "The input tensor of prelu operator.");
     AddOutput("Out", "The output tensor of prelu operator.");
-    AddComment(R"DOC(Prelu operator
+    AddComment(R"DOC(PRelu operator
 
 The equation is:
 f(x) = alpha * x , for x < 0
@@ -52,7 +52,7 @@ f(x) = x         , for x >= 0
 };
 
 // The operator to calculate gradients of a prelu operator.
-class PreluGradOp : public framework::OperatorWithKernel {
+class PReluGradOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
@@ -72,9 +72,9 @@ class PreluGradOp : public framework::OperatorWithKernel {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP(prelu, ops::PreluOp, ops::PreluOpMaker, prelu_grad,
-            ops::PreluGradOp);
+REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker, prelu_grad,
+            ops::PReluGradOp);
 REGISTER_OP_CPU_KERNEL(prelu,
-                       ops::PreluKernel<paddle::platform::CPUPlace, float>);
+                       ops::PReluKernel<paddle::platform::CPUPlace, float>);
 REGISTER_OP_CPU_KERNEL(prelu_grad,
-                       ops::PreluGradKernel<paddle::platform::CPUPlace, float>);
+                       ops::PReluGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index a1e719e314..a7e34744ba 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -25,7 +25,7 @@ template <typename T, int MajorType = Eigen::RowMajor,
 using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
 
 template <typename Place, typename T>
-class PreluKernel : public framework::OpKernel {
+class PReluKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* X = context.Input<Tensor>("X");
@@ -45,7 +45,7 @@ class PreluKernel : public framework::OpKernel {
 };
 
 template <typename Place, typename T>
-class PreluGradKernel : public framework::OpKernel {
+class PReluGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* dX = context.Output<Tensor>(framework::GradVarName("X"));

From 1b2374ad3b2831229d7db5e8cf38c81706fd65ce Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 15 Sep 2017 22:30:21 -0700
Subject: [PATCH 05/13] new prelu with functor

---
 paddle/operators/prelu_op.cc                  | 15 ++--
 paddle/operators/prelu_op.h                   | 69 ++++++++++++++-----
 .../v2/framework/tests/test_prelu_op.py       |  2 +-
 3 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
index eafd66579f..d15352110f 100644
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@@ -27,13 +27,14 @@ class PReluOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
     auto *in = ctx.Input<framework::Tensor>("X");
     auto *out = ctx.Output<framework::LoDTensor>("Out");
     out->Resize(in->dims());
   }
 };
 
-// template <typename AttrType>
+template <typename AttrType>
 class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
@@ -43,10 +44,12 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(PRelu operator
 
 The equation is:
-f(x) = alpha * x , for x < 0
-f(x) = x         , for x >= 0
+
+  f(x) = alpha * x , for x < 0
+  f(x) = x         , for x >= 0
+
 )DOC");
-    AddAttr<float>("alpha", "The scaling factor alpha of prelu.")
+    AddAttr<AttrType>("alpha", "The scaling factor alpha of prelu.")
         .SetDefault(0.0);
   }
 };
@@ -59,6 +62,8 @@ class PReluGradOp : public framework::OperatorWithKernel {
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
+                            "Input(Out@GRAD) should not be null");
     auto *X_grad =
         ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
     auto *X = ctx.Input<framework::Tensor>("X");
@@ -72,7 +77,7 @@ class PReluGradOp : public framework::OperatorWithKernel {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker, prelu_grad,
+REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker<float>, prelu_grad,
             ops::PReluGradOp);
 REGISTER_OP_CPU_KERNEL(prelu,
                        ops::PReluKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index a7e34744ba..a98d489839 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
+#include "paddle/platform/transform.h"
 
 namespace paddle {
 namespace operators {
@@ -23,28 +24,60 @@ using Tensor = framework::Tensor;
 template <typename T, int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
 using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
+using platform::Transform;
 
-template <typename Place, typename T>
+template <typename T>
+class Prelu_functor {
+ public:
+  explicit Prelu_functor(const T& alpha) : alpha_(alpha) {}
+
+  HOSTDEVICE T operator()(const T& X) const {
+    if (X > 0)
+      return X;
+    else
+      return X * alpha_;
+  }
+
+ private:
+  T alpha_;
+};
+
+template <typename Place, typename T, typename AttrType = T>
 class PReluKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* X = context.Input<Tensor>("X");
     auto* Out = context.Output<Tensor>("Out");
 
-    Out->mutable_data<T>(context.GetPlace());
+    const T* X_ptr = X->data<T>();
+    T* O_ptr = Out->mutable_data<T>(context.GetPlace());
 
-    auto alpha = static_cast<T>(context.Attr<float>("alpha"));
+    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
 
-    auto X_vec = EigenVector<T>::Flatten(*X);
-    auto Out_vec = EigenVector<T>::Flatten(*Out);
+    int numel = X->numel();
 
-    // auto place = context.GetEigenDevice<Place>();
-    // Out_vec.device(place)
-    Out_vec = X_vec.cwiseMax(0.f) + X_vec.cwiseMin(0.f) * alpha;
+    auto place = context.GetPlace();
+    Transform(place, X_ptr, X_ptr + numel, O_ptr, Prelu_functor<T>(alpha));
   }
 };
 
-template <typename Place, typename T>
+template <typename T>
+class Prelu_Grad_functor {
+ public:
+  explicit Prelu_Grad_functor(const T& alpha) : alpha_(alpha) {}
+
+  HOSTDEVICE T operator()(const T& Out, const T& dOut) const {
+    if (Out > 0)
+      return dOut;
+    else
+      return dOut * alpha_;
+  }
+
+ private:
+  T alpha_;
+};
+
+template <typename Place, typename T, typename AttrType = T>
 class PReluGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -53,16 +86,16 @@ class PReluGradKernel : public framework::OpKernel {
 
     auto* Out = context.Input<Tensor>("Out");
 
-    auto alpha = static_cast<T>(context.Attr<float>("alpha"));
+    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+
+    T* dX_ptr = dX->mutable_data<T>(context.GetPlace());
+    const T* dO_ptr = dO->data<T>();
+    const T* O_ptr = Out->data<T>();
+    int numel = dX->numel();
 
-    dX->mutable_data<T>(context.GetPlace());
-    for (int i = 0; i < dX->numel(); ++i) {
-      if (Out->data<T>()[i] > 0) {
-        dX->data<T>()[i] = dO->data<T>()[i];
-      } else {
-        dX->data<T>()[i] = dO->data<T>()[i] * alpha;
-      }
-    }
+    auto place = context.GetPlace();
+    Transform(place, O_ptr, O_ptr + numel, dO_ptr, dX_ptr,
+              Prelu_Grad_functor<T>(alpha));
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py
index 39b6f673fd..cbf2e6b2a8 100644
--- a/python/paddle/v2/framework/tests/test_prelu_op.py
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@@ -6,7 +6,7 @@ from op_test import OpTest
 class PreluTest(OpTest):
     def setUp(self):
         self.op_type = "prelu"
-        self.inputs = {'X': np.random.normal(size=(3, 5)).astype("float32")}
+        self.inputs = {'X': np.random.normal(size=(10, 10)).astype("float32")}
         self.attrs = {'alpha': 0.1}
         out_np = np.maximum(self.inputs['X'], 0.)
         out_np = out_np + np.minimum(self.inputs['X'], 0.) * self.attrs['alpha']

From 490482aeb1f3150413b99c78fae8c6a920975649 Mon Sep 17 00:00:00 2001
From: Yu Yang <yuyang18@baidu.com>
Date: Fri, 15 Sep 2017 18:54:00 -0700
Subject: [PATCH 06/13] Do not invoke GPU method when use_gpu=false

---
 .../gserver/gradientmachines/RecurrentGradientMachine.cpp  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
index 9f29b97466..b71431b907 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <cmath>
 #include <functional>
 #include <limits>
+#include <memory>
 #include "NeuralNetwork.h"
 #include "paddle/gserver/layers/AgentLayer.h"
 #include "paddle/utils/Flags.h"
@@ -429,7 +430,11 @@ void RecurrentGradientMachine::reorganizeInput(PassType passType) {
   }
 
   {
-    AsyncGpuBlock asyncGpuBlock;
+    std::unique_ptr<AsyncGpuBlock> asyncBlock;
+
+    if (useGpu_) {
+      asyncBlock.reset(new AsyncGpuBlock());
+    }
 
     // inFrameLine select rows in real layer one time
     for (size_t i = 0; i < inFrameLines_.size(); i++) {

From 86afb85907d74d8d5e6fbe5ca814decc03f4ab43 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Fri, 15 Sep 2017 22:49:43 -0700
Subject: [PATCH 07/13] prelu with gpu

---
 paddle/operators/prelu_op.cu | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 paddle/operators/prelu_op.cu

diff --git a/paddle/operators/prelu_op.cu b/paddle/operators/prelu_op.cu
new file mode 100644
index 0000000000..9e391dabae
--- /dev/null
+++ b/paddle/operators/prelu_op.cu
@@ -0,0 +1,21 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/prelu_op.h"
+
+REGISTER_OP_GPU_KERNEL(
+    prelu, paddle::operators::PReluKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    prelu_grad,
+    paddle::operators::PReluGradKernel<paddle::platform::GPUPlace, float>);

From c165d233222a1fb363b6c6846674742b38b401df Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Sat, 16 Sep 2017 15:55:15 -0700
Subject: [PATCH 08/13] prelu fix

---
 paddle/operators/prelu_op.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index a98d489839..d3d8f76e5a 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -21,9 +21,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = framework::Tensor;
-template <typename T, int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
 using platform::Transform;
 
 template <typename T>

From b6347fb6c0ead317f1f01767b8de72173cbbaa3a Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Mon, 18 Sep 2017 11:09:50 -0700
Subject: [PATCH 09/13] prelu fix

---
 paddle/operators/prelu_op.cc |  6 ++++--
 paddle/operators/prelu_op.h  | 12 ++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
index d15352110f..fd6269a469 100644
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@@ -29,6 +29,8 @@ class PReluOp : public framework::OperatorWithKernel {
   void InferShape(const framework::InferShapeContext &ctx) const override {
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
     auto *in = ctx.Input<framework::Tensor>("X");
+    PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
+                            "Output(Out) should not be null");
     auto *out = ctx.Output<framework::LoDTensor>("Out");
     out->Resize(in->dims());
   }
@@ -41,6 +43,8 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "The input tensor of prelu operator.");
     AddOutput("Out", "The output tensor of prelu operator.");
+    AddAttr<AttrType>("alpha", "The scaling factor alpha of prelu.")
+        .SetDefault(0.0);
     AddComment(R"DOC(PRelu operator
 
 The equation is:
@@ -49,8 +53,6 @@ The equation is:
   f(x) = x         , for x >= 0
 
 )DOC");
-    AddAttr<AttrType>("alpha", "The scaling factor alpha of prelu.")
-        .SetDefault(0.0);
   }
 };
 
diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index d3d8f76e5a..31ae54d5bc 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -24,9 +24,9 @@ using Tensor = framework::Tensor;
 using platform::Transform;
 
 template <typename T>
-class Prelu_functor {
+class PReluFunctor {
  public:
-  explicit Prelu_functor(const T& alpha) : alpha_(alpha) {}
+  explicit PReluFunctor(const T& alpha) : alpha_(alpha) {}
 
   HOSTDEVICE T operator()(const T& X) const {
     if (X > 0)
@@ -54,14 +54,14 @@ class PReluKernel : public framework::OpKernel {
     int numel = X->numel();
 
     auto place = context.GetPlace();
-    Transform(place, X_ptr, X_ptr + numel, O_ptr, Prelu_functor<T>(alpha));
+    Transform(place, X_ptr, X_ptr + numel, O_ptr, PReluFunctor<T>(alpha));
   }
 };
 
 template <typename T>
-class Prelu_Grad_functor {
+class PReluGradFunctor {
  public:
-  explicit Prelu_Grad_functor(const T& alpha) : alpha_(alpha) {}
+  explicit PReluGradFunctor(const T& alpha) : alpha_(alpha) {}
 
   HOSTDEVICE T operator()(const T& Out, const T& dOut) const {
     if (Out > 0)
@@ -92,7 +92,7 @@ class PReluGradKernel : public framework::OpKernel {
 
     auto place = context.GetPlace();
     Transform(place, O_ptr, O_ptr + numel, dO_ptr, dX_ptr,
-              Prelu_Grad_functor<T>(alpha));
+              PReluGradFunctor<T>(alpha));
   }
 };
 

From 1b797468899097487c210b1ed761ae91beefcb11 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Mon, 18 Sep 2017 15:34:51 -0700
Subject: [PATCH 10/13] prelu

---
 paddle/operators/prelu_op.cc | 23 +++++++++-----
 paddle/operators/prelu_op.h  | 58 +++++++++++++++++++-----------------
 2 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
index fd6269a469..911df8ba67 100644
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@@ -29,6 +29,11 @@ class PReluOp : public framework::OperatorWithKernel {
   void InferShape(const framework::InferShapeContext &ctx) const override {
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
     auto *in = ctx.Input<framework::Tensor>("X");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Alpha"),
+                            "Input(Alpha) should not be null");
+    auto *alpha = ctx.Input<framework::Tensor>("Alpha");
+    PADDLE_ENFORCE(alpha->numel() == 1, "Size of weight Alpha must be one.");
+
     PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
                             "Output(Out) should not be null");
     auto *out = ctx.Output<framework::LoDTensor>("Out");
@@ -36,15 +41,13 @@ class PReluOp : public framework::OperatorWithKernel {
   }
 };
 
-template <typename AttrType>
 class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "The input tensor of prelu operator.");
+    AddInput("Alpha", "The alpha weight of prelu operator.");
     AddOutput("Out", "The output tensor of prelu operator.");
-    AddAttr<AttrType>("alpha", "The scaling factor alpha of prelu.")
-        .SetDefault(0.0);
     AddComment(R"DOC(PRelu operator
 
 The equation is:
@@ -66,11 +69,15 @@ class PReluGradOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                             "Input(Out@GRAD) should not be null");
-    auto *X_grad =
-        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
-    auto *X = ctx.Input<framework::Tensor>("X");
+    auto *dx = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
+    auto *x = ctx.Input<framework::Tensor>("X");
+
+    auto *dalpha =
+        ctx.Output<framework::LoDTensor>(framework::GradVarName("Alpha"));
+    auto *alpha = ctx.Input<framework::Tensor>("Alpha");
 
-    X_grad->Resize(X->dims());
+    dx->Resize(x->dims());
+    dalpha->Resize(alpha->dims());
   }
 };
 
@@ -79,7 +86,7 @@ class PReluGradOp : public framework::OperatorWithKernel {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker<float>, prelu_grad,
+REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker, prelu_grad,
             ops::PReluGradOp);
 REGISTER_OP_CPU_KERNEL(prelu,
                        ops::PReluKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index 31ae54d5bc..f88ce94dc8 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -28,33 +28,35 @@ class PReluFunctor {
  public:
   explicit PReluFunctor(const T& alpha) : alpha_(alpha) {}
 
-  HOSTDEVICE T operator()(const T& X) const {
-    if (X > 0)
-      return X;
+  HOSTDEVICE T operator()(const T& x) const {
+    if (x > 0)
+      return x;
     else
-      return X * alpha_;
+      return x * alpha_;
   }
 
  private:
   T alpha_;
 };
 
-template <typename Place, typename T, typename AttrType = T>
+template <typename Place, typename T>
 class PReluKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* X = context.Input<Tensor>("X");
-    auto* Out = context.Output<Tensor>("Out");
+    auto* x = context.Input<Tensor>("X");
+    auto* alpha = context.Input<Tensor>("Alpha");
+    auto* out = context.Output<Tensor>("Out");
 
-    const T* X_ptr = X->data<T>();
-    T* O_ptr = Out->mutable_data<T>(context.GetPlace());
+    const T* x_ptr = x->data<T>();
+    T* o_ptr = out->mutable_data<T>(context.GetPlace());
 
-    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+    auto alpha_val = alpha->data<T>()[0];
+    // auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
 
-    int numel = X->numel();
+    int numel = x->numel();
 
     auto place = context.GetPlace();
-    Transform(place, X_ptr, X_ptr + numel, O_ptr, PReluFunctor<T>(alpha));
+    Transform(place, x_ptr, x_ptr + numel, o_ptr, PReluFunctor<T>(alpha_val));
   }
 };
 
@@ -63,36 +65,36 @@ class PReluGradFunctor {
  public:
   explicit PReluGradFunctor(const T& alpha) : alpha_(alpha) {}
 
-  HOSTDEVICE T operator()(const T& Out, const T& dOut) const {
-    if (Out > 0)
-      return dOut;
+  HOSTDEVICE T operator()(const T& out, const T& dout) const {
+    if (out > 0)
+      return dout;
     else
-      return dOut * alpha_;
+      return dout * alpha_;
   }
 
  private:
   T alpha_;
 };
 
-template <typename Place, typename T, typename AttrType = T>
+template <typename Place, typename T>
 class PReluGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
-    auto* dO = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
+    auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
 
-    auto* Out = context.Input<Tensor>("Out");
+    auto* out = context.Input<Tensor>("Out");
+    auto* alpha = context.Input<Tensor>("Alpha");
+    auto alpha_val = alpha->data<T>()[0];
 
-    auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
-
-    T* dX_ptr = dX->mutable_data<T>(context.GetPlace());
-    const T* dO_ptr = dO->data<T>();
-    const T* O_ptr = Out->data<T>();
-    int numel = dX->numel();
+    T* dx_ptr = dx->mutable_data<T>(context.GetPlace());
+    const T* dout_ptr = dout->data<T>();
+    const T* out_ptr = out->data<T>();
+    int numel = dx->numel();
 
     auto place = context.GetPlace();
-    Transform(place, O_ptr, O_ptr + numel, dO_ptr, dX_ptr,
-              PReluGradFunctor<T>(alpha));
+    Transform(place, out_ptr, out_ptr + numel, dout_ptr, dx_ptr,
+              PReluGradFunctor<T>(alpha_val));
   }
 };
 

From 3c3a6d90ae961920284fc32abc8d7395fc8812cc Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Mon, 18 Sep 2017 16:36:41 -0700
Subject: [PATCH 11/13] prelu finalize

---
 paddle/operators/prelu_op.h                   | 23 ++++++++++---------
 .../v2/framework/tests/test_prelu_op.py       | 10 ++++----
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h
index f88ce94dc8..ece2a836a6 100644
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@@ -26,17 +26,17 @@ using platform::Transform;
 template <typename T>
 class PReluFunctor {
  public:
-  explicit PReluFunctor(const T& alpha) : alpha_(alpha) {}
+  explicit PReluFunctor(const T* alpha) : alpha_(alpha) {}
 
   HOSTDEVICE T operator()(const T& x) const {
     if (x > 0)
       return x;
     else
-      return x * alpha_;
+      return x * (*alpha_);
   }
 
  private:
-  T alpha_;
+  const T* alpha_;
 };
 
 template <typename Place, typename T>
@@ -50,30 +50,29 @@ class PReluKernel : public framework::OpKernel {
     const T* x_ptr = x->data<T>();
     T* o_ptr = out->mutable_data<T>(context.GetPlace());
 
-    auto alpha_val = alpha->data<T>()[0];
-    // auto alpha = static_cast<T>(context.Attr<AttrType>("alpha"));
+    auto* alpha_ptr = alpha->data<T>();
 
     int numel = x->numel();
 
     auto place = context.GetPlace();
-    Transform(place, x_ptr, x_ptr + numel, o_ptr, PReluFunctor<T>(alpha_val));
+    Transform(place, x_ptr, x_ptr + numel, o_ptr, PReluFunctor<T>(alpha_ptr));
   }
 };
 
 template <typename T>
 class PReluGradFunctor {
  public:
-  explicit PReluGradFunctor(const T& alpha) : alpha_(alpha) {}
+  explicit PReluGradFunctor(const T* alpha) : alpha_(alpha) {}
 
   HOSTDEVICE T operator()(const T& out, const T& dout) const {
     if (out > 0)
       return dout;
     else
-      return dout * alpha_;
+      return dout * (*alpha_);
   }
 
  private:
-  T alpha_;
+  const T* alpha_;
 };
 
 template <typename Place, typename T>
@@ -85,7 +84,7 @@ class PReluGradKernel : public framework::OpKernel {
 
     auto* out = context.Input<Tensor>("Out");
     auto* alpha = context.Input<Tensor>("Alpha");
-    auto alpha_val = alpha->data<T>()[0];
+    auto* alpha_ptr = alpha->data<T>();
 
     T* dx_ptr = dx->mutable_data<T>(context.GetPlace());
     const T* dout_ptr = dout->data<T>();
@@ -94,7 +93,9 @@ class PReluGradKernel : public framework::OpKernel {
 
     auto place = context.GetPlace();
     Transform(place, out_ptr, out_ptr + numel, dout_ptr, dx_ptr,
-              PReluGradFunctor<T>(alpha_val));
+              PReluGradFunctor<T>(alpha_ptr));
+
+    // TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready
   }
 };
 
diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py
index cbf2e6b2a8..b74812e969 100644
--- a/python/paddle/v2/framework/tests/test_prelu_op.py
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@@ -3,13 +3,15 @@ import numpy as np
 from op_test import OpTest
 
 
-class PreluTest(OpTest):
+class PReluTest(OpTest):
     def setUp(self):
         self.op_type = "prelu"
-        self.inputs = {'X': np.random.normal(size=(10, 10)).astype("float32")}
-        self.attrs = {'alpha': 0.1}
+        x_np = np.random.normal(size=(10, 10)).astype("float32")
+        alpha_np = np.array([.1])
+        self.inputs = {'X': x_np, 'Alpha': alpha_np}
         out_np = np.maximum(self.inputs['X'], 0.)
-        out_np = out_np + np.minimum(self.inputs['X'], 0.) * self.attrs['alpha']
+        out_np = out_np + np.minimum(self.inputs['X'],
+                                     0.) * self.inputs['Alpha']
         assert out_np is not self.inputs['X']
         self.outputs = {'Out': out_np}
 

From 154d88c26188b25fa6eec99b1d7e743918fa43d9 Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Mon, 18 Sep 2017 17:29:58 -0700
Subject: [PATCH 12/13] fix gradient not stable

---
 python/paddle/v2/framework/tests/test_prelu_op.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py
index b74812e969..2b6b7db368 100644
--- a/python/paddle/v2/framework/tests/test_prelu_op.py
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@@ -7,6 +7,8 @@ class PReluTest(OpTest):
     def setUp(self):
         self.op_type = "prelu"
         x_np = np.random.normal(size=(10, 10)).astype("float32")
+        x_np_sign = np.sign(x_np)
+        x_np = x_np_sign * np.maximum(x_np, .005)
         alpha_np = np.array([.1])
         self.inputs = {'X': x_np, 'Alpha': alpha_np}
         out_np = np.maximum(self.inputs['X'], 0.)

From 6d1446ee830b78e858d458b7ee183fa412ffe81d Mon Sep 17 00:00:00 2001
From: zchen0211 <chenzhuoyuan07@gmail.com>
Date: Mon, 18 Sep 2017 17:54:58 -0700
Subject: [PATCH 13/13] prelu

---
 paddle/operators/prelu_op.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc
index 911df8ba67..7ae80b2968 100644
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@@ -46,8 +46,8 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
   PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "The input tensor of prelu operator.");
-    AddInput("Alpha", "The alpha weight of prelu operator.");
-    AddOutput("Out", "The output tensor of prelu operator.");
+    AddInput("Alpha", "The alpha weight of PRelu operator.");
+    AddOutput("Out", "The output tensor of PRelu operator.");
     AddComment(R"DOC(PRelu operator
 
 The equation is: