Add selu (#14415)
* add selu * use for range test=develop * add API test=develop * follow comment test=develop * update API.spec test=developpanyx0718-patch-1
parent
9d29ebc010
commit
82773477ae
@ -0,0 +1,135 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/selu_op.h"
|
||||
#include <string>
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class SeluOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
SeluOp(const std::string &type, const framework::VariableNameMap &inputs,
|
||||
const framework::VariableNameMap &outputs,
|
||||
const framework::AttributeMap &attrs)
|
||||
: OperatorWithKernel(type, inputs, outputs, attrs) {}
|
||||
|
||||
void InferShape(framework::InferShapeContext *ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||
"Input(X) of SeluOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||
"Output(Out) of SeluOp should not be null.");
|
||||
|
||||
ctx->ShareDim("X", /*->*/ "Out");
|
||||
ctx->ShareLoD("X", /*->*/ "Out");
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext &ctx) const override {
|
||||
return framework::OpKernelType(
|
||||
framework::GetDataTypeOfVar(ctx.InputVar("X")), ctx.GetPlace());
|
||||
}
|
||||
};
|
||||
|
||||
class SeluOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
|
||||
protected:
|
||||
std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
|
||||
const override {
|
||||
return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Out"}};
|
||||
}
|
||||
};
|
||||
|
||||
class SeluOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput("X", "The input tensor of selu operator.");
|
||||
AddOutput("Out", "The output tensor of selu operator.");
|
||||
AddAttr<float>("scale",
|
||||
"(float) the default value is 1.0507~. For more "
|
||||
"information about this value, please refer to:"
|
||||
"https://arxiv.org/abs/1706.02515.")
|
||||
.SetDefault(1.0507009873554804934193349852946);
|
||||
AddAttr<float>("alpha",
|
||||
"(float) the default value is 1.6732~. For more "
|
||||
"information about this value, please refer to:"
|
||||
"https://arxiv.org/abs/1706.02515.")
|
||||
.SetDefault(1.6732632423543772848170429916717);
|
||||
AddComment(R"DOC(
|
||||
Selu Operator.
|
||||
|
||||
The equation is:
|
||||
$$
|
||||
f(x) =\lambda*
|
||||
\begin{cases}
|
||||
\quad \quad x, \quad \quad \quad \text{if} \ x > 0 \\
|
||||
\alpha * e^x - \alpha, \qquad \text{if} \ x <= 0
|
||||
\end{cases}
|
||||
$$
|
||||
|
||||
The input `X` can carry the LoD (Level of Details) information,
|
||||
or not. And the output shares the LoD information with input `X`.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class SeluGradMaker : public framework::SingleGradOpDescMaker {
|
||||
public:
|
||||
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
|
||||
|
||||
std::unique_ptr<framework::OpDesc> Apply() const override {
|
||||
auto *grad_op = new framework::OpDesc();
|
||||
grad_op->SetType("selu_grad");
|
||||
grad_op->SetInput("Out", Output("Out"));
|
||||
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
|
||||
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
|
||||
grad_op->SetAttrMap(this->Attrs());
|
||||
return std::unique_ptr<framework::OpDesc>(grad_op);
|
||||
}
|
||||
};
|
||||
|
||||
class SeluGradOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext *ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
|
||||
"Input(Out@GRAD) should not be null");
|
||||
PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should not be null");
|
||||
auto x_grad_name = framework::GradVarName("X");
|
||||
ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("Out"));
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext &ctx) const override {
|
||||
return framework::OpKernelType(
|
||||
framework::GetDataTypeOfVar(ctx.InputVar("Out")), ctx.GetPlace());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OPERATOR(selu, ops::SeluOp, ops::SeluOpMaker, ops::SeluOpInferVarType,
|
||||
ops::SeluGradMaker);
|
||||
REGISTER_OPERATOR(selu_grad, ops::SeluGradOp);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
selu, ops::SeluKernel<paddle::platform::CPUDeviceContext, float>,
|
||||
ops::SeluKernel<paddle::platform::CPUDeviceContext, double>);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
selu_grad, ops::SeluGradKernel<paddle::platform::CPUDeviceContext, float>,
|
||||
ops::SeluGradKernel<paddle::platform::CPUDeviceContext, double>);
|
@ -0,0 +1,22 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
#include "paddle/fluid/operators/selu_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
selu, ops::SeluKernel<paddle::platform::CUDADeviceContext, float>,
|
||||
ops::SeluKernel<paddle::platform::CUDADeviceContext, double>);
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
selu_grad, ops::SeluGradKernel<paddle::platform::CUDADeviceContext, float>,
|
||||
ops::SeluGradKernel<paddle::platform::CUDADeviceContext, double>);
|
@ -0,0 +1,124 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/platform/for_range.h"
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
static HOSTDEVICE float real_exp(float x) { return expf(x); }
|
||||
static HOSTDEVICE float real_exp(double x) { return exp(x); }
|
||||
|
||||
template <typename T>
|
||||
struct SeluFunctor {
|
||||
SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr)
|
||||
: x_data_ptr_(x_data_ptr),
|
||||
alpha_(alpha),
|
||||
scale_(scale),
|
||||
y_data_ptr_(y_data_ptr) {}
|
||||
|
||||
HOSTDEVICE void operator()(size_t idx) const {
|
||||
T x_ele = x_data_ptr_[idx];
|
||||
if (x_ele <= 0) {
|
||||
x_ele = alpha_ * real_exp(x_ele) - alpha_;
|
||||
}
|
||||
y_data_ptr_[idx] = scale_ * x_ele;
|
||||
}
|
||||
const T* x_data_ptr_;
|
||||
const float alpha_;
|
||||
const float scale_;
|
||||
T* y_data_ptr_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct SeluGradFunctor {
|
||||
SeluGradFunctor(const T* y_data_ptr, const T* dy_data_ptr, float alpha,
|
||||
float scale, T* dx_data_ptr)
|
||||
: y_data_ptr_(y_data_ptr),
|
||||
dy_data_ptr_(dy_data_ptr),
|
||||
alpha_(alpha),
|
||||
scale_(scale),
|
||||
la_(alpha * scale),
|
||||
dx_data_ptr_(dx_data_ptr) {}
|
||||
|
||||
HOSTDEVICE void operator()(size_t idx) const {
|
||||
T y_ele = y_data_ptr_[idx];
|
||||
T dy_ele = dy_data_ptr_[idx];
|
||||
|
||||
float tmp = scale_;
|
||||
if (y_ele <= 0) {
|
||||
tmp = y_ele + la_;
|
||||
}
|
||||
dx_data_ptr_[idx] = dy_ele * tmp;
|
||||
}
|
||||
const T* y_data_ptr_;
|
||||
const T* dy_data_ptr_;
|
||||
const float alpha_;
|
||||
const float scale_;
|
||||
const float la_;
|
||||
T* dx_data_ptr_;
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class SeluKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
auto* x = context.Input<Tensor>("X");
|
||||
auto* out = context.Output<Tensor>("Out");
|
||||
|
||||
float alpha = context.Attr<float>("alpha");
|
||||
float scale = context.Attr<float>("scale");
|
||||
|
||||
auto out_ptr = out->mutable_data<T>(context.GetPlace());
|
||||
|
||||
SeluFunctor<T> functor(x->data<T>(), alpha, scale, out_ptr);
|
||||
|
||||
auto& dev_ctx = context.template device_context<DeviceContext>();
|
||||
size_t limit = static_cast<size_t>(x->numel());
|
||||
platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
|
||||
for_range(functor);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class SeluGradKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
auto* out = context.Input<Tensor>("Out");
|
||||
auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
|
||||
auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
float alpha = context.Attr<float>("alpha");
|
||||
float scale = context.Attr<float>("scale");
|
||||
|
||||
auto dx_ptr = dx->mutable_data<T>(context.GetPlace());
|
||||
|
||||
SeluGradFunctor<T> functor(out->data<T>(), dout->data<T>(), alpha, scale,
|
||||
dx_ptr);
|
||||
|
||||
auto& dev_ctx = context.template device_context<DeviceContext>();
|
||||
size_t limit = static_cast<size_t>(out->numel());
|
||||
platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
|
||||
for_range(functor);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,71 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import six
|
||||
from op_test import OpTest
|
||||
|
||||
|
||||
class SeluTest(OpTest):
|
||||
def setUp(self):
|
||||
self.op_type = "selu"
|
||||
self.x_shape = [3, 5, 5, 10]
|
||||
self.dtype = np.float32
|
||||
self.init_x_shape()
|
||||
self.init_dtype()
|
||||
|
||||
alpha = 1.6732632423543772848170429916717
|
||||
scale = 1.0507009873554804934193349852946
|
||||
|
||||
x = np.random.normal(size=self.x_shape).astype(self.dtype)
|
||||
|
||||
# Since zero point in selu is not differentiable, avoid randomize
|
||||
# zero.
|
||||
x[np.abs(x) < 0.005] = 0.02
|
||||
|
||||
x_flat = x.flatten()
|
||||
|
||||
for i in range(x_flat.size):
|
||||
if x_flat[i] < 0:
|
||||
x_flat[i] = alpha * np.exp(x_flat[i]) - alpha
|
||||
x_flat[i] = scale * x_flat[i]
|
||||
|
||||
out_np = x_flat.reshape(self.x_shape)
|
||||
|
||||
self.inputs = {'X': x}
|
||||
self.outputs = {'Out': out_np}
|
||||
|
||||
self.attrs = {
|
||||
'alpha': alpha,
|
||||
'scale': scale,
|
||||
}
|
||||
|
||||
def init_x_shape(self):
|
||||
pass
|
||||
|
||||
def init_dtype(self):
|
||||
pass
|
||||
|
||||
def test_check_output(self):
|
||||
self.check_output()
|
||||
|
||||
def test_check_grad(self):
|
||||
self.check_grad(['X'], 'Out')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in new issue