elu gelu relu logsigmoid, test=develop (#26304)

* logsigmoid and LogSigmoid, test=develop * add elu gelu relu, test=develop * update to_variable to to_tensor, test=develop * address review comments, test=develop * address review comments, test=develop * change to_variable to to_tensor in test, test=develop
5 years ago · 61800f4a1e
parent 0a461ac3d5
commit 61800f4a1e
5 changed files with 503 additions and 151 deletions
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@ -645,6 +645,7 @@ __all__ += ['gelu']
 _gelu_ = generate_layer_fn('gelu')


+@deprecated(since="2.0.0", update_to="paddle.nn.functional.gelu")
 def gelu(x, approximate=False):
    locals_var = locals().copy()
    kwargs = dict()
@ -655,10 +656,6 @@ def gelu(x, approximate=False):


 gelu.__doc__ = """
-	:alias_main: paddle.nn.functional.gelu
-	:alias: paddle.nn.functional.gelu,paddle.nn.functional.activation.gelu
-	:old_api: paddle.fluid.layers.gelu
-
 :strong:`GeLU Activation Operator`
 For more details, see [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415).

--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
--- a/python/paddle/nn/init.py
+++ b/python/paddle/nn/init.py
@ -51,11 +51,14 @@ from .decode import beam_search_decode  #DEFINE_ALIAS
 from .decode import gather_tree  #DEFINE_ALIAS
 from .input import data  #DEFINE_ALIAS
 # from .input import Input        #DEFINE_ALIAS
+from .layer.activation import ELU
+from .layer.activation import GELU
 from .layer.activation import Hardshrink
 # from .layer.activation import PReLU        #DEFINE_ALIAS
-from .layer.activation import ReLU  #DEFINE_ALIAS
+from .layer.activation import ReLU
 from .layer.activation import LeakyReLU  #DEFINE_ALIAS
 from .layer.activation import Sigmoid  #DEFINE_ALIAS
+from .layer.activation import LogSigmoid
 # from .layer.activation import Softmax        #DEFINE_ALIAS
 from .layer.activation import LogSoftmax  #DEFINE_ALIAS
 from .layer.activation import HSigmoid  #DEFINE_ALIAS
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@ -14,13 +14,10 @@

 # TODO: define activation functions of neural network
 from ...fluid.layers import brelu  #DEFINE_ALIAS
-from ...fluid.layers import elu  #DEFINE_ALIAS
 from ...fluid.layers import erf  #DEFINE_ALIAS
-from ...fluid.layers import gelu  #DEFINE_ALIAS
 from ...fluid.layers import hard_sigmoid  #DEFINE_ALIAS
 from ...fluid.layers import hard_swish  #DEFINE_ALIAS
 from ...fluid.layers import leaky_relu  #DEFINE_ALIAS
-from ...fluid.layers import logsigmoid  #DEFINE_ALIAS
 from ...fluid.layers import maxout  #DEFINE_ALIAS
 from ...fluid.layers import relu6  #DEFINE_ALIAS
 from ...fluid.layers import selu  #DEFINE_ALIAS
@ -69,6 +66,108 @@ from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
 import paddle


+def elu(x, alpha=1.0, name=None):
+    """
+    elu activation.
+
+    ..  math::
+
+        elu(x) = max(0, x) + min(0, \\alpha * (e^{x}-1))
+
+    Parameters:
+        x (Tensor): The input Tensor with data type float32, float64.
+        alpha (float, optional): The 'alpha' value of the ELU formulation. Default is 1.0.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+    
+    Returns:
+        A Tensor with the same data type and shape as ``x`` .
+    
+    Examples:
+        .. code-block:: python
+
+        import paddle
+        import paddle.nn.functional as F
+        import numpy as np
+
+        paddle.disable_static()
+
+        x = paddle.to_tensor(np.array([[-1,6],[1,15.6]]))
+        out = F.elu(x, alpha=0.2) 
+        # [[-0.12642411  6.        ]
+        #  [ 1.          15.6      ]]
+    """
+
+    if in_dygraph_mode():
+        return core.ops.elu(x, 'alpha', alpha)
+
+    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu')
+    helper = LayerHelper("elu", **locals())
+    out = helper.create_variable_for_type_inference(x.dtype)
+    helper.append_op(
+        type='elu',
+        inputs={'X': x},
+        outputs={'Out': out},
+        attrs={'alpha': alpha})
+    return out
+
+
+def gelu(x, approximate=False, name=None):
+    """
+    gelu activation.
+
+    if approximate is True
+    ..  math::
+        gelu(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3})))
+    else
+    ..  math::
+        gelu(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}}))
+    
+    Parameters:
+        x (Tensor): The input Tensor with data type float32, float64.
+        approximate (bool, optional): Wether to enable approximation. Default is False.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+    
+    Returns:
+        A Tensor with the same data type and shape as ``x`` .
+    
+    Examples:
+        .. code-block:: python
+
+        import paddle
+        import paddle.nn.functional as F
+        import numpy as np
+
+        paddle.disable_static()
+
+        data = np.random.randn(2, 3).astype("float32")
+        x = paddle.to_tensor(data)
+
+        out = F.gelu(x)
+
+        data
+        # array([[ 0.87165993, -1.0541513 , -0.37214822],
+        #         [ 0.15647964,  0.32496083,  0.33045998]], dtype=float32)
+        out
+        # array([[ 0.70456535, -0.15380788, -0.13207214],
+        #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
+    """
+
+    if in_dygraph_mode():
+        return core.ops.gelu(x, 'approximate', approximate)
+
+    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'gelu')
+    helper = LayerHelper("gelu", **locals())
+    out = helper.create_variable_for_type_inference(x.dtype)
+    helper.append_op(
+        type='gelu',
+        inputs={'X': x},
+        outputs={'Out': out},
+        attrs={'approximate': approximate})
+    return out
+
+
 def hardshrink(x, threshold=0.5, name=None):
    """
    hard shrinkage activation
@ -245,11 +344,8 @@ def hsigmoid(input,
    return out


-def relu(input, inplace=False, name=None):
+def relu(x, name=None):
    """
-	:alias_main: paddle.nn.functional.relu
-	:alias: paddle.nn.functional.relu,paddle.nn.functional.activation.relu
-
    ReLU Activation.

    .. math:
@ -257,44 +353,74 @@ def relu(input, inplace=False, name=None):
        out = max(x, 0)

    Parameters:
-        input (Variable): The input variable. A multi-dimension Tensor with type float16, float32, or float64.
-        inplace (bool, optional): If inplace is True, the input and output of ``ReLU`` are the same variable.
-            Otherwise, the input and output of ``ReLU`` are different variables. Default: False. Note that if x is
-            more than one OPs' input, inplace must be False.
-        name (str, optional): The default value is None.  Normally there is no need for user to set this property.
-            For more information, please refer to :ref:`api_guide_Name` .
+        x (Tensor): The input Tensor with data type float32, float64.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
-        Output of relu operator, a Tensor with shape same as input
+        A Tensor with the same data type and shape as ``x`` .

    Examples:
        .. code-block:: python

-          import paddle.fluid as fluid
-          import paddle.nn.functional as functional
-          import numpy as np
+        import paddle
+        import paddle.nn.functional as F
+        import numpy as np
+
+        paddle.disable_static()

-          data = np.array([-2, 0, 1]).astype('float32')
-          with fluid.dygraph.guard():
-              data = fluid.dygraph.to_variable(data)
-              res = functional.relu(data)  # [0, 0, 1]
+        x = paddle.to_tensor(np.array([-2, 0, 1]).astype('float32'))
+        out = F.relu(x) # [0., 0., 1.]
    """

    if in_dygraph_mode():
-        if inplace:
-            warnings.warn(
-                "Inplace on ReLU is not allowed and will be discarded in dygraph mode currently."
-            )
-        return core.ops.relu(input)
-
-    check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'],
-                             'relu')
+        return core.ops.relu(x)

+    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu')
    helper = LayerHelper('relu', **locals())
-    outs = input if inplace else helper.create_variable_for_type_inference(
-        input.dtype)
-    helper.append_op(type='relu', inputs={'X': [input]}, outputs={'Out': outs})
-    return outs
+    out = helper.create_variable_for_type_inference(x.dtype)
+    helper.append_op(type='relu', inputs={'X': x}, outputs={'Out': out})
+    return out
+
+
+def logsigmoid(x, name=None):
+    """
+    logsigmoid activation.
+
+    .. math:
+
+        logsigmoid(x) = \log \frac{1}{1 + e^{-x}}
+    
+    Parameters:
+        x (Tensor): The input Tensor with data type float32, float64.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+    
+    Returns:
+        A Tensor with the same data type and shape as ``x`` .
+    
+    Examples:
+        .. code-block:: python
+
+        import paddle
+        import paddle.nn.functional as F
+        import numpy as np
+
+        paddle.disable_static()
+
+        x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0]))
+        out = F.logsigmoid(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376]
+    """
+
+    if in_dygraph_mode():
+        return core.ops.logsigmoid(x)
+
+    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
+                             'logsigmoid')
+    helper = LayerHelper("logsigmoid", **locals())
+    out = helper.create_variable_for_type_inference(x.dtype)
+    helper.append_op(type='logsigmoid', inputs={'X': x}, outputs={'Out': out})
+    return out


 def softmax(x, axis=-1, name=None):
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@ -15,12 +15,15 @@
 # TODO: define activation functions of neural network

 __all__ = [
+    'ELU',
+    'GELU',
    'Hardshrink',
    #       'PReLU',
    'ReLU',
    'LeakyReLU',
    'Sigmoid',
    #       'Softmax',
+    'LogSigmoid',
    'LogSoftmax',
    'HSigmoid'
 ]
@ -31,6 +34,103 @@ from ...fluid.framework import in_dygraph_mode
 from .. import functional as F


+class ELU(layers.Layer):
+    """
+    ELU Activation.
+
+    ..  math::
+    
+        ELU(x) = max(0, x) + min(0, \\alpha * (e^{x}-1))
+
+    Parameters:
+        alpha (float, optional): The 'alpha' value of the ELU formulation. Default is 1.0.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+    
+    Shape:
+        - input: Tensor with any shape.
+        - output: Tensor with the same shape as input.
+    
+    Examples:
+        .. code-block:: python
+
+        import paddle
+        import numpy as np
+
+        paddle.disable_static()
+
+        x = paddle.to_tensor(np.array([[-1,6],[1,15.6]]))
+        m = paddle.nn.ELU(0.2)
+        out = m(x) 
+        # [[-0.12642411  6.        ]
+        #  [ 1.          15.6      ]]
+    """
+
+    def __init__(self, alpha=1.0, name=None):
+        super(ELU, self).__init__()
+        self._alpha = alpha
+        self._name = name
+
+    def forward(self, x):
+        return F.elu(x, self._alpha, self._name)
+
+
+class GELU(layers.Layer):
+    """
+    GELU Activation.
+
+    If approximate is True
+
+    ..  math::
+
+        GELU(x) = 0.5 * x * (1 + tanh(\\sqrt{\\frac{2}{\\pi}} * (x + 0.044715x^{3})))
+
+    else
+
+    ..  math::
+
+        GELU(x) = 0.5 * x * (1 + erf(\\frac{x}{\\sqrt{2}}))
+
+    Parameters:
+        approximate (bool, optional): Wether to enable approximation. Default is False.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+    
+    Shape:
+        - input: Tensor with any shape.
+        - output: Tensor with the same shape as input.
+    
+    Examples:
+        .. code-block:: python
+
+        import paddle
+        import numpy as np
+
+        paddle.disable_static()
+
+        data = np.random.randn(2, 3).astype("float32")
+        x = paddle.to_tensor(data)
+
+        m = paddle.nn.GELU()
+        out = m(x)
+
+        data
+        # array([[ 0.87165993, -1.0541513 , -0.37214822],
+        #         [ 0.15647964,  0.32496083,  0.33045998]], dtype=float32)
+        out
+        # array([[ 0.70456535, -0.15380788, -0.13207214],
+        #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
+    """
+
+    def __init__(self, approximate=False, name=None):
+        super(GELU, self).__init__()
+        self._approximate = approximate
+        self._name = name
+
+    def forward(self, x):
+        return F.gelu(x, self._approximate, self._name)
+
+
 class Hardshrink(layers.Layer):
    """
    Hardshrink Activation
@ -216,44 +316,39 @@ class HSigmoid(layers.Layer):

 class ReLU(layers.Layer):
    """
-	:alias_main: paddle.nn.ReLU
-	:alias: paddle.nn.ReLU,paddle.nn.layer.ReLU,paddle.nn.layer.activation.ReLU
-
    ReLU Activation.

    .. math:

-        out = max(x, 0)
+        ReLU(x) = max(x, 0)

    Parameters:
-        inplace (bool, optional): If inplace is True, the input and output of 
-            ``ReLU`` are the same variable. Otherwise, the input and output of
-            ``ReLU`` are different variables. Default False. Note that if x is
-            more than one OPs' input, inplace must be False.
-    
-    Returns:
-        None
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Shape:
+        - input: Tensor with any shape.
+        - output: Tensor with the same shape as input.
    
    Examples:
        .. code-block:: python

-          import paddle.fluid as fluid
-          import paddle.nn as nn
-          import numpy as np
+        import paddle
+        import numpy as np

-          data = np.array([-2, 0, 1]).astype('float32')
-          my_relu = nn.ReLU()
-          with fluid.dygraph.guard():
-              data = fluid.dygraph.to_variable(data)
-              res = my_relu(data)  # [0, 0, 1]
+        paddle.disable_static()
+
+        x = paddle.to_tensor(np.array([-2, 0, 1]).astype('float32'))
+        m = paddle.nn.ReLU()
+        out = m(x) # [0., 0., 1.]
    """

-    def __init__(self, inplace=False):
+    def __init__(self, name=None):
        super(ReLU, self).__init__()
-        self._inplace = inplace
+        self._name = name

-    def forward(self, input):
-        return F.relu(input, self._inplace)
+    def forward(self, x):
+        return F.relu(x, self._name)


 class LeakyReLU(layers.Layer):
@ -336,6 +431,44 @@ class Sigmoid(layers.Layer):
        return F.sigmoid(x, self.name)


+class LogSigmoid(layers.Layer):
+    """
+    LogSigmoid Activation.
+    
+    .. math:
+
+        LogSigmoid(x) = \log \frac{1}{1 + e^{-x}}
+
+    Parameters:
+        x (Tensor): The input Tensor with data type float32, or float64.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+    
+    Shape:
+        - input: Tensor with any shape.
+        - output: Tensor with the same shape as input.
+    
+    Examples:
+        .. code-block:: python
+
+        import paddle
+        import numpy as np
+
+        paddle.disable_static()
+
+        x = paddle.to_tensor(np.array([1.0, 2.0, 3.0, 4.0]))
+        m = paddle.nn.LogSigmoid()
+        out = m(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376]
+    """
+
+    def __init__(self, name=None):
+        super(LogSigmoid, self).__init__()
+        self._name = name
+
+    def forward(self, x):
+        return F.logsigmoid(x, self._name)
+
+
 class LogSoftmax(layers.Layer):
    """
    This operator implements the log_softmax layer. The calculation process is as follows: