Add the ReLU6, Tanhshrink, SELU, Softplus, Softshrink and Softsign for the api 2.0 (#26376)

revert-24895-update_cub
hong19860320 5 years ago committed by GitHub
parent 6e13e86ab3
commit 40d193ed17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -317,13 +317,6 @@ $$out = x^2$$
)DOC";
UNUSED constexpr char SoftplusDoc[] = R"DOC(
Softplus Activation Operator.
$$out = \ln(1 + e^{x})$$
)DOC";
UNUSED constexpr char SoftsignDoc[] = R"DOC(
Softsign Activation Operator.
@ -396,6 +389,36 @@ $$out = \max(x, \alpha * x)$$
}
};
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"Input of Softplus operator, an N-D Tensor, with data type "
"float32, float64 or float16.");
AddOutput(
"Out",
"Output of Softplus operator, a Tensor with shape same as input.");
AddAttr<float>("beta", "The value of beta for Softplus.").SetDefault(1.0f);
AddAttr<float>("threshold", "The value of threshold for Softplus.")
.SetDefault(20.0f);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel.")
.SetDefault(false);
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn.")
.SetDefault(false);
AddComment(R"DOC(
:strong:`Softplus Activation Operator`
.. math::
out = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) \\
\text{For numerical stability, the implementation reverts to the linear function when :}\,x \times \beta > threshold.
)DOC");
}
};
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
@ -672,7 +695,6 @@ REGISTER_ACTIVATION_OP_MAKER(Reciprocal, ReciprocalDoc);
REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Log1p, Log1pDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
template <ActBwdOpFwdDeps kDepValue>

@ -975,32 +975,46 @@ struct HardSwishGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
// softplus(x) = log(1 + exp(x))
// When x is a very large positive number, exp(x) may explode to inf,
// Using trick below for numerical stability
// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
// Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0)))
// For numerical stability, using the following formula instead of softplus(x) =
// log(1 + exp(x))
// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta =
// 1, threshold = 20 by default), otherwise x
template <typename T>
struct SoftplusFunctor : public BaseActivationFunctor<T> {
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) {
auto temp = x.cwiseMax(static_cast<T>(0)); // temp = max(x, 0)
out.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log());
auto x_beta = static_cast<T>(beta) * x;
out.device(d) = (x_beta > static_cast<T>(threshold))
.select(x, (static_cast<T>(1) + x_beta.exp()).log() /
static_cast<T>(beta));
}
};
// d(softplus(x))/dx = exp(x) / (1 + exp(x))
// For numerical stability:
// d(softplus(x))/dx = exp(x - max(x, 0)) / (exp(-max(x, 0)) +
// exp(x - max(x, 0)))
// For numerical stability, using the following formula instead of
// d(softplus(x))/dx = 1 / (1 + exp(-x))
// d(softplus(x))/dx = 1 / (1 + exp(-beta * x)) when beta * x <= threshold(beta
// = 1, threshold = 20 by default), otherwise x
template <typename T>
struct SoftplusGradFunctor : public BaseActivationFunctor<T> {
float beta;
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}, {"threshold", &threshold}};
}
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) {
auto temp = x.cwiseMax(static_cast<T>(0)); // temp = max(x, 0)
auto x_beta = static_cast<T>(beta) * x;
dx.device(d) =
dout * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp()));
(x_beta > static_cast<T>(threshold))
.select(dout, dout / (static_cast<T>(1) + (-x_beta).exp()));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }

@ -8643,11 +8643,9 @@ def relu(x, name=None):
return out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.selu")
def selu(x, scale=None, alpha=None, name=None):
"""
:alias_main: paddle.nn.functional.selu
:alias: paddle.nn.functional.selu,paddle.nn.functional.activation.selu
:old_api: paddle.fluid.layers.selu
Selu Operator.
@ -9304,12 +9302,9 @@ def elu(x, alpha=1.0, name=None):
return out
@templatedoc()
@deprecated(since="2.0.0", update_to="paddle.nn.functional.relu6")
def relu6(x, threshold=6.0, name=None):
"""
:alias_main: paddle.nn.functional.relu6
:alias: paddle.nn.functional.relu6,paddle.nn.functional.activation.relu6
:old_api: paddle.fluid.layers.relu6
${comment}

@ -20,6 +20,8 @@ from ..framework import convert_np_dtype_to_dtype_, Variable
from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
from paddle.utils import deprecated
__deprecated_func_name__ = {'tanh_shrink': 'tanhshrink', }
__activations_noattr__ = [
'sigmoid',
'logsigmoid',
@ -64,14 +66,20 @@ __all__ += __activations_noattr__
__all__ += __unary_func__
for _OP in set(__activations_noattr__):
_new_OP = _OP
if _OP in __deprecated_func_name__:
_new_OP = __deprecated_func_name__[_OP]
func = generate_activation_fn(_OP)
func = deprecated(
since="2.0.0", update_to="paddle.nn.functional.%s" % (_OP))(func)
since="2.0.0", update_to="paddle.nn.functional.%s" % (_new_OP))(func)
globals()[_OP] = func
for _OP in set(__unary_func__):
_new_OP = _OP
if _OP in __deprecated_func_name__:
_new_OP = __deprecated_func_name__[_OP]
func = generate_activation_fn(_OP)
func = deprecated(since="2.0.0", update_to="paddle.%s" % (_OP))(func)
func = deprecated(since="2.0.0", update_to="paddle.%s" % (_new_OP))(func)
globals()[_OP] = func
add_sample_code(globals()["sigmoid"], r"""
@ -160,16 +168,14 @@ add_sample_code(globals()["tanh_shrink"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.to_variable(x_data)
out = F.tanh_shrink(x)
print(out.numpy())
# [-0.02005104 -0.00262468 0.00033201 0.00868739]
x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739]
""")
@ -401,16 +407,14 @@ add_sample_code(globals()["softplus"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.to_variable(x_data)
out = F.softplus(x)
print(out.numpy())
# [0.51301525 0.59813887 0.74439666 0.85435524]
x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
""")
@ -418,16 +422,14 @@ add_sample_code(globals()["softsign"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.to_variable(x_data)
out = F.softsign(x)
print(out.numpy())
# [-0.28571429 -0.16666667 0.09090909 0.23076923]
x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
out = F.softsign(x) # [-0.285714, -0.166667, 0.0909091, 0.230769]
""")

File diff suppressed because it is too large Load Diff

@ -17,9 +17,26 @@ from __future__ import print_function
import unittest
import numpy as np
import six
import paddle.fluid.core as core
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.fluid import compiler, Program, program_guard
def ref_selu(x,
scale=1.0507009873554804934193349852946,
alpha=1.6732632423543772848170429916717):
out = np.copy(x)
out_flat = out.flatten()
for i in range(out_flat.size):
if out_flat[i] < 0:
out_flat[i] = alpha * np.exp(out_flat[i]) - alpha
out_flat[i] = scale * out_flat[i]
out = out_flat.reshape(x.shape)
return out
class SeluTest(OpTest):
@ -39,17 +56,10 @@ class SeluTest(OpTest):
# zero.
x[np.abs(x) < 0.005] = 0.02
x_flat = x.flatten()
for i in range(x_flat.size):
if x_flat[i] < 0:
x_flat[i] = alpha * np.exp(x_flat[i]) - alpha
x_flat[i] = scale * x_flat[i]
out_np = x_flat.reshape(self.x_shape)
out = ref_selu(x, scale, alpha)
self.inputs = {'X': x}
self.outputs = {'Out': out_np}
self.outputs = {'Out': out}
self.attrs = {
'alpha': alpha,
@ -69,17 +79,60 @@ class SeluTest(OpTest):
self.check_grad(['X'], 'Out')
class TestSeluOpError(unittest.TestCase):
class TestSeluAPI(unittest.TestCase):
# test paddle.nn.SELU, paddle.nn.functional.selu
def setUp(self):
self.scale = 1.5
self.alpha = 2.0
self.x_np = np.random.normal(size=[3, 5, 5, 10]).astype(np.float64)
# Since zero point in selu is not differentiable, avoid randomize
# zero.
self.x_np[np.abs(self.x_np) < 0.005] = 0.02
self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \
else paddle.CPUPlace()
def test_static_api(self):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.data('X', self.x_np.shape, self.x_np.dtype)
out1 = F.selu(x, self.scale, self.alpha)
selu = paddle.nn.SELU(self.scale, self.alpha)
out2 = selu(x)
exe = paddle.static.Executor(self.place)
res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2])
out_ref = ref_selu(self.x_np, self.scale, self.alpha)
for r in res:
self.assertEqual(np.allclose(out_ref, r), True)
def test_dygraph_api(self):
paddle.disable_static(self.place)
x = paddle.to_tensor(self.x_np)
out1 = F.selu(x, self.scale, self.alpha)
selu = paddle.nn.SELU(self.scale, self.alpha)
out2 = selu(x)
out_ref = ref_selu(self.x_np, self.scale, self.alpha)
for r in [out1, out2]:
self.assertEqual(np.allclose(out_ref, r.numpy()), True)
paddle.enable_static()
def test_fluid_api(self):
with fluid.program_guard(fluid.Program()):
x = fluid.data('X', self.x_np.shape, self.x_np.dtype)
out = fluid.layers.selu(x, self.scale, self.alpha)
exe = fluid.Executor(self.place)
res = exe.run(feed={'X': self.x_np}, fetch_list=[out])
out_ref = ref_selu(self.x_np, self.scale, self.alpha)
self.assertEqual(np.allclose(out_ref, res[0]), True)
def test_errors(self):
with program_guard(Program()):
with paddle.static.program_guard(paddle.static.Program()):
# The input type must be Variable.
self.assertRaises(TypeError, fluid.layers.selu, 1)
self.assertRaises(TypeError, F.selu, 1)
# The input dtype must be float16, float32, float64.
x_int32 = fluid.data(name='x_int32', shape=[12, 10], dtype='int32')
self.assertRaises(TypeError, fluid.layers.selu, x_int32)
# support the input dtype is float32
x_fp32 = fluid.data(name='x_fp32', shape=[12, 10], dtype='float32')
fluid.layers.selu(x_fp32)
x_int32 = paddle.data(name='x_int32', shape=[12, 10], dtype='int32')
self.assertRaises(TypeError, F.selu, x_int32)
# support the input dtype is float16
x_fp16 = paddle.data(name='x_fp16', shape=[12, 10], dtype='float16')
F.selu(x_fp16)
if __name__ == "__main__":

@ -57,10 +57,16 @@ from .layer.activation import GELU
from .layer.activation import Hardshrink
# from .layer.activation import PReLU #DEFINE_ALIAS
from .layer.activation import ReLU
from .layer.activation import ReLU6 #DEFINE_ALIAS
from .layer.activation import SELU #DEFINE_ALIAS
from .layer.activation import LeakyReLU #DEFINE_ALIAS
from .layer.activation import Sigmoid #DEFINE_ALIAS
from .layer.activation import LogSigmoid
# from .layer.activation import Softmax #DEFINE_ALIAS
from .layer.activation import Softplus #DEFINE_ALIAS
from .layer.activation import Softshrink #DEFINE_ALIAS
from .layer.activation import Softsign #DEFINE_ALIAS
from .layer.activation import Tanhshrink #DEFINE_ALIAS
from .layer.activation import LogSoftmax #DEFINE_ALIAS
from .layer.activation import HSigmoid #DEFINE_ALIAS
from .layer.common import BilinearTensorProduct #DEFINE_ALIAS

@ -47,7 +47,7 @@ from .activation import softplus #DEFINE_ALIAS
from .activation import softshrink #DEFINE_ALIAS
from .activation import softsign #DEFINE_ALIAS
from .activation import swish #DEFINE_ALIAS
from .activation import tanh_shrink #DEFINE_ALIAS
from .activation import tanhshrink #DEFINE_ALIAS
from .activation import thresholded_relu #DEFINE_ALIAS
from .activation import log_softmax #DEFINE_ALIAS
from .common import dropout #DEFINE_ALIAS

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save