Merge pull request #4121 from zchen0211/develop

Prelu with forward, backward and python test passed
8 years ago · f86c1ccdbe
parent 6e9337e301 6d1446ee83
commit f86c1ccdbe
4 changed files with 246 additions and 0 deletions
--- a/paddle/operators/prelu_op.cc
+++ b/paddle/operators/prelu_op.cc
@ -0,0 +1,94 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/prelu_op.h"
+#include "paddle/operators/net_op.h"
+
+namespace paddle {
+namespace operators {
+
+class PReluOp : public framework::OperatorWithKernel {
+ public:
+  PReluOp(const std::string &type, const framework::VariableNameMap &inputs,
+          const framework::VariableNameMap &outputs,
+          const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
+    auto *in = ctx.Input<framework::Tensor>("X");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Alpha"),
+                            "Input(Alpha) should not be null");
+    auto *alpha = ctx.Input<framework::Tensor>("Alpha");
+    PADDLE_ENFORCE(alpha->numel() == 1, "Size of weight Alpha must be one.");
+
+    PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
+                            "Output(Out) should not be null");
+    auto *out = ctx.Output<framework::LoDTensor>("Out");
+    out->Resize(in->dims());
+  }
+};
+
+class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input tensor of prelu operator.");
+    AddInput("Alpha", "The alpha weight of PRelu operator.");
+    AddOutput("Out", "The output tensor of PRelu operator.");
+    AddComment(R"DOC(PRelu operator
+
+The equation is:
+
+  f(x) = alpha * x , for x < 0
+  f(x) = x         , for x >= 0
+
+)DOC");
+  }
+};
+
+// The operator to calculate gradients of a prelu operator.
+class PReluGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
+                            "Input(Out@GRAD) should not be null");
+    auto *dx = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
+    auto *x = ctx.Input<framework::Tensor>("X");
+
+    auto *dalpha =
+        ctx.Output<framework::LoDTensor>(framework::GradVarName("Alpha"));
+    auto *alpha = ctx.Input<framework::Tensor>("Alpha");
+
+    dx->Resize(x->dims());
+    dalpha->Resize(alpha->dims());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP(prelu, ops::PReluOp, ops::PReluOpMaker, prelu_grad,
+            ops::PReluGradOp);
+REGISTER_OP_CPU_KERNEL(prelu,
+                       ops::PReluKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(prelu_grad,
+                       ops::PReluGradKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/prelu_op.cu
+++ b/paddle/operators/prelu_op.cu
@ -0,0 +1,21 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/prelu_op.h"
+
+REGISTER_OP_GPU_KERNEL(
+    prelu, paddle::operators::PReluKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    prelu_grad,
+    paddle::operators::PReluGradKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/prelu_op.h
+++ b/paddle/operators/prelu_op.h
@ -0,0 +1,103 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/platform/transform.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using platform::Transform;
+
+template <typename T>
+class PReluFunctor {
+ public:
+  explicit PReluFunctor(const T* alpha) : alpha_(alpha) {}
+
+  HOSTDEVICE T operator()(const T& x) const {
+    if (x > 0)
+      return x;
+    else
+      return x * (*alpha_);
+  }
+
+ private:
+  const T* alpha_;
+};
+
+template <typename Place, typename T>
+class PReluKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* x = context.Input<Tensor>("X");
+    auto* alpha = context.Input<Tensor>("Alpha");
+    auto* out = context.Output<Tensor>("Out");
+
+    const T* x_ptr = x->data<T>();
+    T* o_ptr = out->mutable_data<T>(context.GetPlace());
+
+    auto* alpha_ptr = alpha->data<T>();
+
+    int numel = x->numel();
+
+    auto place = context.GetPlace();
+    Transform(place, x_ptr, x_ptr + numel, o_ptr, PReluFunctor<T>(alpha_ptr));
+  }
+};
+
+template <typename T>
+class PReluGradFunctor {
+ public:
+  explicit PReluGradFunctor(const T* alpha) : alpha_(alpha) {}
+
+  HOSTDEVICE T operator()(const T& out, const T& dout) const {
+    if (out > 0)
+      return dout;
+    else
+      return dout * (*alpha_);
+  }
+
+ private:
+  const T* alpha_;
+};
+
+template <typename Place, typename T>
+class PReluGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
+    auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
+
+    auto* out = context.Input<Tensor>("Out");
+    auto* alpha = context.Input<Tensor>("Alpha");
+    auto* alpha_ptr = alpha->data<T>();
+
+    T* dx_ptr = dx->mutable_data<T>(context.GetPlace());
+    const T* dout_ptr = dout->data<T>();
+    const T* out_ptr = out->data<T>();
+    int numel = dx->numel();
+
+    auto place = context.GetPlace();
+    Transform(place, out_ptr, out_ptr + numel, dout_ptr, dx_ptr,
+              PReluGradFunctor<T>(alpha_ptr));
+
+    // TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/v2/framework/tests/test_prelu_op.py
+++ b/python/paddle/v2/framework/tests/test_prelu_op.py
@ -0,0 +1,28 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class PReluTest(OpTest):
+    def setUp(self):
+        self.op_type = "prelu"
+        x_np = np.random.normal(size=(10, 10)).astype("float32")
+        x_np_sign = np.sign(x_np)
+        x_np = x_np_sign * np.maximum(x_np, .005)
+        alpha_np = np.array([.1])
+        self.inputs = {'X': x_np, 'Alpha': alpha_np}
+        out_np = np.maximum(self.inputs['X'], 0.)
+        out_np = out_np + np.minimum(self.inputs['X'],
+                                     0.) * self.inputs['Alpha']
+        assert out_np is not self.inputs['X']
+        self.outputs = {'Out': out_np}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
+
+
+if __name__ == "__main__":
+    unittest.main()