You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
396 lines
16 KiB
396 lines
16 KiB
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# TODO: define activation functions of neural network
|
|
from ...fluid.layers import brelu #DEFINE_ALIAS
|
|
from ...fluid.layers import elu #DEFINE_ALIAS
|
|
from ...fluid.layers import erf #DEFINE_ALIAS
|
|
from ...fluid.layers import gelu #DEFINE_ALIAS
|
|
from ...fluid.layers import hard_shrink #DEFINE_ALIAS
|
|
from ...fluid.layers import hard_sigmoid #DEFINE_ALIAS
|
|
from ...fluid.layers import hard_swish #DEFINE_ALIAS
|
|
from ...fluid.layers import leaky_relu #DEFINE_ALIAS
|
|
from ...fluid.layers import logsigmoid #DEFINE_ALIAS
|
|
from ...fluid.layers import maxout #DEFINE_ALIAS
|
|
from ...fluid.layers import relu6 #DEFINE_ALIAS
|
|
from ...fluid.layers import selu #DEFINE_ALIAS
|
|
from ...fluid.layers import soft_relu #DEFINE_ALIAS
|
|
from ...fluid.layers import softmax #DEFINE_ALIAS
|
|
from ...fluid.layers import softplus #DEFINE_ALIAS
|
|
from ...fluid.layers import softshrink #DEFINE_ALIAS
|
|
from ...fluid.layers import softsign #DEFINE_ALIAS
|
|
from ...fluid.layers import swish #DEFINE_ALIAS
|
|
from ...fluid.layers import tanh_shrink #DEFINE_ALIAS
|
|
from ...fluid.layers import thresholded_relu #DEFINE_ALIAS
|
|
|
|
__all__ = [
|
|
'brelu',
|
|
'elu',
|
|
'erf',
|
|
'gelu',
|
|
'hard_shrink',
|
|
'hard_sigmoid',
|
|
'hard_swish',
|
|
'hsigmoid',
|
|
'leaky_relu',
|
|
'logsigmoid',
|
|
'maxout',
|
|
# 'prelu',
|
|
'relu',
|
|
'relu6',
|
|
'selu',
|
|
'sigmoid',
|
|
'soft_relu',
|
|
'softmax',
|
|
'softplus',
|
|
'softshrink',
|
|
'softsign',
|
|
'swish',
|
|
'tanh_shrink',
|
|
'thresholded_relu',
|
|
'log_softmax'
|
|
]
|
|
|
|
import warnings
|
|
from ...fluid.layer_helper import LayerHelper
|
|
from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_
|
|
from ...fluid import core
|
|
from ...fluid.data_feeder import check_variable_and_dtype
|
|
|
|
|
|
def hsigmoid(input,
|
|
label,
|
|
weight,
|
|
bias,
|
|
num_classes,
|
|
path_table=None,
|
|
path_code=None,
|
|
is_sparse=False):
|
|
"""
|
|
:alias_main: paddle.nn.functional.hsigmoid
|
|
:alias: paddle.nn.functional.hsigmoid,paddle.nn.functional.activation.hsigmoid
|
|
|
|
The hierarchical sigmoid organizes the classes into a complete binary tree to reduce the computational complexity
|
|
and speed up the model training, especially the training of language model.
|
|
Each leaf node of the complete binary tree represents a class(word) and each non-leaf node acts as a binary classifier.
|
|
For each class(word), there's a unique path from root to itself, hsigmoid calculate the cost for each non-leaf node on
|
|
the path, and sum them to get a total cost.
|
|
Comparing to softmax, the OP can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
|
|
represents the number of classes or the size of word dict.
|
|
|
|
The OP supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural
|
|
Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`_. For the custom
|
|
tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example):
|
|
|
|
1. Using a custom word dict to build a binary tree, each leaf node should be an word in the word dict.
|
|
2. Creating a dict map word_id -> path that from the word to the root node, we call it path_table.
|
|
3. Creating a dict map word_id -> code of path that from the word to the root node, we call it path_code.
|
|
Code means the label of each binary classifier, 1 indicate true, 0 indicate false.
|
|
4. Now, each word should has its path and code along the path, you can pass a batch of path and code related
|
|
to the same batch of inputs.
|
|
|
|
Parameters:
|
|
input (Variable): A tensor with the shape [N, D], where N is the size of mini-batch,
|
|
and D is the feature size. Its data type supports float32 and float64.
|
|
label (Variable): A tensor contains the labels of training data. Its shape is [N, 1]
|
|
and data type is int64.
|
|
weight (Variable): A tensor with shape (num_classes - 1, D) if not using custom tree(path_code and path_table is None), or (num_classes, D) if using custom tree.
|
|
bias (Variable): A tensor with shape (num_classes - 1, 1) if not using custom tree(path_code and path_table is None), or (num_classes, 1) if using custom tree.
|
|
num_classes (int): The number of classes or the size of word dict, must be greater than 2.
|
|
If the default tree is used (:attr:`is_custom` is set to False), :attr:`num_classes`
|
|
should not be None. If the custom tree is used (:attr:`is_custom` is set to True),
|
|
:attr:`num_classes` should be the number of non-leaf nodes, which indicates the num of
|
|
classes using by the binary classifier.
|
|
path_table (Variable, optional): A tensor that stores each batch of samples' path from leaf to root
|
|
node, its shape is [N, L] and data type is int64, where L is the length of path. For each sample i,
|
|
path_table[i] is a np.array like structure and each element in this array is the indexes in parent
|
|
nodes' weight matrix. Default: None.
|
|
path_code (Variable, optional): A tensor that stores each batch of samples' code of path from leaf
|
|
to root node, its shape is [N, L] and data type is int64, which is the same as :attr:`path_table`.
|
|
Each code of path is consisted with the code of nodes from leaf to root node. Default: None.
|
|
is_sparse (bool, optional): Whether use sparse updating instead of dense updating, if it's True, the
|
|
gradient of W and input will be sparse. Default: False.
|
|
|
|
Returns:
|
|
Variable: A tensor with the cost of hierarchical sigmoid, its shape is [N, 1] and data type is the same as :attr:`input`.
|
|
|
|
Examples:
|
|
|
|
.. code-block:: python
|
|
|
|
from paddle import fluid, nn
|
|
import paddle.fluid.dygraph as dg
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
|
|
main = fluid.Program()
|
|
start = fluid.Program()
|
|
feature_size = 6
|
|
num_classes = 8
|
|
with fluid.unique_name.guard():
|
|
with fluid.program_guard(main, start):
|
|
x = fluid.data("input", [-1, feature_size],
|
|
dtype="float32")
|
|
label = fluid.data("labels", [-1, 1], dtype="int64")
|
|
w = fluid.data("weight", (num_classes -1, feature_size), dtype="float32")
|
|
b = fluid.data("bias", (num_classes -1, ), dtype="float32")
|
|
y = F.hsigmoid(x, label, w, b, num_classes)
|
|
|
|
place = fluid.CPUPlace()
|
|
exe = fluid.Executor(place)
|
|
exe.run(start)
|
|
feed_dict = {
|
|
"input": np.random.randn(4, feature_size).astype(np.float32),
|
|
"labels": np.random.randint(0, num_classes, (4, 1)).astype(np.int64),
|
|
"weight": np.random.randn(num_classes - 1, feature_size).astype(np.float32),
|
|
"bias": np.random.randn(num_classes - 1, ).astype(np.float32),
|
|
}
|
|
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y])
|
|
print(y_np.shape)
|
|
|
|
# (4, 1)
|
|
"""
|
|
|
|
attrs = {
|
|
"num_classes": num_classes,
|
|
"is_sparse": is_sparse,
|
|
"remote_prefetch": is_sparse
|
|
}
|
|
|
|
inputs = {
|
|
"X": input,
|
|
"W": weight,
|
|
"Bias": bias,
|
|
"PathTable": path_table,
|
|
"PathCode": path_code,
|
|
"Label": label
|
|
}
|
|
|
|
helper = LayerHelper('hierarchical_sigmoid', **locals())
|
|
dtype = helper.input_dtype()
|
|
|
|
out = helper.create_variable_for_type_inference(dtype)
|
|
pre_out = helper.create_variable_for_type_inference(dtype)
|
|
outputs = {"Out": out, "PreOut": pre_out, "W_Out": weight}
|
|
|
|
helper.append_op(
|
|
type="hierarchical_sigmoid",
|
|
inputs=inputs,
|
|
outputs=outputs,
|
|
attrs=attrs)
|
|
return out
|
|
|
|
|
|
def relu(input, inplace=False, name=None):
|
|
"""
|
|
:alias_main: paddle.nn.functional.relu
|
|
:alias: paddle.nn.functional.relu,paddle.nn.functional.activation.relu
|
|
|
|
ReLU Activation.
|
|
|
|
.. math:
|
|
|
|
out = max(x, 0)
|
|
|
|
Parameters:
|
|
input (Variable): The input variable. A multi-dimension Tensor with type float16, float32, or float64.
|
|
inplace (bool, optional): If inplace is True, the input and output of ``ReLU`` are the same variable.
|
|
Otherwise, the input and output of ``ReLU`` are different variables. Default: False. Note that if x is
|
|
more than one OPs' input, inplace must be False.
|
|
name (str, optional): The default value is None. Normally there is no need for user to set this property.
|
|
For more information, please refer to :ref:`api_guide_Name` .
|
|
|
|
Returns:
|
|
Output of relu operator, a Tensor with shape same as input
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import paddle.nn.functional as functional
|
|
import numpy as np
|
|
|
|
data = np.array([-2, 0, 1]).astype('float32')
|
|
with fluid.dygraph.guard():
|
|
data = fluid.dygraph.to_variable(data)
|
|
res = functional.relu(data) # [0, 0, 1]
|
|
"""
|
|
|
|
if in_dygraph_mode():
|
|
if inplace:
|
|
warnings.warn(
|
|
"Inplace on ReLU is not allowed and will be discarded in dygraph mode currently."
|
|
)
|
|
return core.ops.relu(input)
|
|
|
|
check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'],
|
|
'relu')
|
|
|
|
helper = LayerHelper('relu', **locals())
|
|
outs = input if inplace else helper.create_variable_for_type_inference(
|
|
input.dtype)
|
|
helper.append_op(type='relu', inputs={'X': [input]}, outputs={'Out': outs})
|
|
return outs
|
|
|
|
|
|
def sigmoid(input, inplace=False, name=None):
|
|
"""
|
|
:alias_main: paddle.nn.functional.sigmoid
|
|
:alias: paddle.nn.functional.sigmoid,paddle.nn.functional.activation.sigmoid
|
|
|
|
Sigmoid Activation.
|
|
|
|
.. math:
|
|
|
|
output = \frac{1}{1 + e^{-input}}
|
|
|
|
Parameters:
|
|
input (Variable): The input variable. A multi-dimension Tensor with type float16, float32, or float64.
|
|
inplace (bool, optional): If inplace is True, the input and output are the same variable.
|
|
Otherwise, the input and output of are different variables. Default: False. Note that if x is
|
|
more than one OPs' input, inplace must be False.
|
|
name (str, optional): The default value is None. Normally there is no need for user to set this property.
|
|
For more information, please refer to :ref:`api_guide_Name` .
|
|
|
|
Returns:
|
|
Output of sigmoid operator, a Tensor with shape same as input
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import paddle.nn.functional as functional
|
|
import numpy as np
|
|
# In the static graph mode
|
|
input = fluid.data(name="input", shape=[None, 4])
|
|
output = functional.sigmoid(input)
|
|
place = fluid.CPUPlace()
|
|
exe = fluid.Executor(place)
|
|
exe.run(fluid.default_startup_program())
|
|
input_data = np.array([1.0, 2.0, 3.0, 4.0]).astype('float32')
|
|
output_data = exe.run(feed={"input": input_data},
|
|
fetch_list=[output])
|
|
print(output_data) # [0.7310586, 0.880797, 0.95257413, 0.98201376]
|
|
# In the dynamic graph mode
|
|
with fluid.dygraph.guard():
|
|
input = fluid.dygraph.to_variable(input_data)
|
|
output = functional.sigmoid(input)
|
|
print(output) # [0.7310586, 0.880797, 0.95257413, 0.98201376]
|
|
"""
|
|
|
|
if in_dygraph_mode():
|
|
if inplace:
|
|
warnings.warn(
|
|
"Inplace on sigmoid is not allowed and will be discarded in dygraph mode currently."
|
|
)
|
|
return core.ops.sigmoid(input)
|
|
|
|
check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'],
|
|
'sigmoid')
|
|
helper = LayerHelper("sigmoid", **locals())
|
|
outputs = helper.create_variable_for_type_inference(input.dtype)
|
|
helper.append_op(
|
|
type='sigmoid', inputs={'X': [input]}, outputs={'Out': outputs})
|
|
return outputs
|
|
|
|
|
|
def log_softmax(input, axis=None, dtype=None, name=None):
|
|
"""
|
|
:alias_main: paddle.nn.functional.log_softmax
|
|
:alias: paddle.nn.functional.log_softmax,paddle.nn.functional.activation.log_softmax
|
|
|
|
This operator implements the log_softmax layer. The calculation process is as follows:
|
|
|
|
.. math::
|
|
|
|
Out[i, j] = log(softmax(x))
|
|
= log(\\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])})
|
|
|
|
Parameters:
|
|
input (Variable): The input variable. A multi-dimension Tensor with type float32, or float64.
|
|
axis (int, optional): The index of dimension to perform softmax calculations, it should be in
|
|
range :math:`[-1, rank-1]`, while :math:`rank` is the rank of input variable. Default: None.
|
|
None and -1 means the last dimension.
|
|
dtype (np.dtype|core.VarDesc.VarType|str): The desired data type of returned tensor. If specified,
|
|
the input tensor is casted to dtype before the operation is performed. This is useful for
|
|
preventing data type overflows. Default: None. Supported dtype: float32 or float64
|
|
name (str, optional): The default value is None. Normally there is no need for user to set this property.
|
|
For more information, please refer to :ref:`api_guide_Name` .
|
|
|
|
Returns:
|
|
Variable: ``Tensor`` indicates the output of softmax. The data type and shape are the same as ``input``.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
|
|
data = np.array([[[-2.0, 3.0, -4.0, 5.0],
|
|
[3.0, -4.0, 5.0, -6.0],
|
|
[-7.0, -8.0, 8.0, 9.0]],
|
|
[[1.0, -2.0, -3.0, 4.0],
|
|
[-5.0, 6.0, 7.0, -8.0],
|
|
[6.0, 7.0, 8.0, 9.0]]]).astype('float32')
|
|
with fluid.dygraph.guard():
|
|
data = fluid.dygraph.to_variable(data)
|
|
res = F.log_softmax(data, -1)
|
|
# [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948]
|
|
# [ -2.1270514 -9.127051 -0.12705144 -11.127051 ]
|
|
# [-16.313261 -17.313261 -1.3132617 -0.31326184]]
|
|
# [[ -3.0518122 -6.051812 -7.051812 -0.051812 ]
|
|
# [-12.313267 -1.3132664 -0.3132665 -15.313267 ]
|
|
# [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]]
|
|
"""
|
|
|
|
axis = -1 if axis is None else axis
|
|
dtype = convert_np_dtype_to_dtype_(dtype) if dtype is not None else dtype
|
|
|
|
if in_dygraph_mode():
|
|
outs_cast = input if dtype is None \
|
|
else core.ops.cast(input, 'in_dtype', input.dtype, 'out_dtype', dtype)
|
|
outs_softmax = core.ops.softmax(outs_cast, 'axis', axis, 'use_cudnn',
|
|
False)
|
|
return core.ops.log(outs_softmax)
|
|
|
|
if dtype is None:
|
|
check_variable_and_dtype(
|
|
input, 'input', ['float16', 'float32', 'float64'], 'log_softmax')
|
|
|
|
helper = LayerHelper("log_softmax", **locals())
|
|
outs_cast = input
|
|
if dtype is not None:
|
|
outs_cast = helper.create_variable_for_type_inference(dtype)
|
|
helper.append_op(
|
|
type='cast',
|
|
inputs={'X': input},
|
|
outputs={'Out': outs_cast},
|
|
attrs={'in_dtype': input.dtype,
|
|
'out_dtype': dtype})
|
|
|
|
outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype)
|
|
helper.append_op(
|
|
type='softmax',
|
|
inputs={'X': outs_cast},
|
|
outputs={'Out': outs_softmax},
|
|
attrs={'axis': axis,
|
|
'use_cudnn': False})
|
|
|
|
outs_log = helper.create_variable_for_type_inference(outs_softmax.dtype)
|
|
helper.append_op(
|
|
type='log', inputs={'X': outs_softmax}, outputs={'Out': outs_log})
|
|
|
|
return outs_log
|