|
|
|
@ -225,11 +225,11 @@ def embedding(input,
|
|
|
|
|
have two elements which indicate the size of the dictionary of
|
|
|
|
|
embeddings and the size of each embedding vector respectively.
|
|
|
|
|
is_sparse(bool): The flag indicating whether to use sparse update.
|
|
|
|
|
is_distributed (bool): Whether to run lookup table from remote parameter server.
|
|
|
|
|
is_distributed(bool): Whether to run lookup table from remote parameter server.
|
|
|
|
|
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
|
|
|
|
|
Otherwise the given :attr:`padding_idx` indicates padding the output
|
|
|
|
|
with zeros whenever lookup encounters it in :attr:`input`. If
|
|
|
|
|
:math:`padding_idx < 0`, the padding_idx to use in lookup is
|
|
|
|
|
:math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is
|
|
|
|
|
:math:`size[0] + dim`.
|
|
|
|
|
param_attr(ParamAttr): Parameters for this layer
|
|
|
|
|
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc
|
|
|
|
@ -1235,14 +1235,17 @@ def conv2d(input,
|
|
|
|
|
act=None,
|
|
|
|
|
name=None):
|
|
|
|
|
"""
|
|
|
|
|
**Convlution2D Layer**
|
|
|
|
|
|
|
|
|
|
The convolution2D layer calculates the output based on the input, filter
|
|
|
|
|
and strides, paddings, dilations, groups parameters. Input(Input) and
|
|
|
|
|
Output(Output) are in NCHW format. Where N is batch size, C is the number of
|
|
|
|
|
and strides, paddings, dilations, groups parameters. Input and
|
|
|
|
|
Output are in NCHW format, where N is batch size, C is the number of
|
|
|
|
|
channels, H is the height of the feature, and W is the width of the feature.
|
|
|
|
|
The details of convolution layer, please refer UFLDL's `convolution,
|
|
|
|
|
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
|
|
|
|
|
Filter is in MCHW format, where M is the number of output image channels,
|
|
|
|
|
C is the number of input image channels, H is the height of the filter,
|
|
|
|
|
and W is the width of the filter. If the groups is greater than 1,
|
|
|
|
|
C will equal the number of input image channels divided by the groups.
|
|
|
|
|
Please refer to UFLDL's `convolution
|
|
|
|
|
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
|
|
|
|
|
for more detials.
|
|
|
|
|
If bias attribution and activation type are provided, bias is added to the
|
|
|
|
|
output of the convolution, and the corresponding activation function is
|
|
|
|
|
applied to the final result.
|
|
|
|
@ -1253,15 +1256,14 @@ def conv2d(input,
|
|
|
|
|
|
|
|
|
|
Out = \sigma (W \\ast X + b)
|
|
|
|
|
|
|
|
|
|
In the above equation:
|
|
|
|
|
Where:
|
|
|
|
|
|
|
|
|
|
* :math:`X`: Input value, a tensor with NCHW format.
|
|
|
|
|
* :math:`W`: Filter value, a tensor with MCHW format.
|
|
|
|
|
* :math:`\\ast`: Convolution operation.
|
|
|
|
|
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
|
|
|
|
|
* :math:`\\sigma`: Activation function.
|
|
|
|
|
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
|
|
|
|
|
different.
|
|
|
|
|
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
|
@ -1272,6 +1274,7 @@ def conv2d(input,
|
|
|
|
|
Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
|
|
|
|
|
|
|
|
|
|
- Output:
|
|
|
|
|
|
|
|
|
|
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
|
|
|
|
|
|
|
|
|
|
Where
|
|
|
|
@ -1283,7 +1286,7 @@ def conv2d(input,
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input (Variable): The input image with [N, C, H, W] format.
|
|
|
|
|
num_filters(int): The number of filter. It is as same as the output
|
|
|
|
|
num_filters(int): The number of filter. It is as same as the output
|
|
|
|
|
image channel.
|
|
|
|
|
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
|
|
|
|
|
it must contain two integers, (filter_size_H, filter_size_W).
|
|
|
|
@ -1306,7 +1309,8 @@ def conv2d(input,
|
|
|
|
|
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
|
|
|
|
|
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
|
|
|
|
|
library is installed. Default: True
|
|
|
|
|
use_mkldnn (bool): Use mkldnn kernels or not.
|
|
|
|
|
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
|
|
|
|
|
with mkldnn library. Default: False
|
|
|
|
|
act (str): Activation type. Default: None
|
|
|
|
|
name (str|None): A name for this layer(optional). If set None, the layer
|
|
|
|
|
will be named automatically.
|
|
|
|
@ -1974,6 +1978,7 @@ def batch_norm(input,
|
|
|
|
|
return helper.append_activation(batch_norm_out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@templatedoc()
|
|
|
|
|
def layer_norm(input,
|
|
|
|
|
scale=True,
|
|
|
|
|
shift=True,
|
|
|
|
@ -1984,20 +1989,11 @@ def layer_norm(input,
|
|
|
|
|
act=None,
|
|
|
|
|
name=None):
|
|
|
|
|
"""
|
|
|
|
|
**Layer Normalization**
|
|
|
|
|
|
|
|
|
|
Assume feature vectors exist on dimensions
|
|
|
|
|
:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
|
|
|
|
|
along these dimensions for each feature vector :math:`a` with size
|
|
|
|
|
:math:`H`, then normalize each feature vector using the corresponding
|
|
|
|
|
statistics. After that, apply learnable gain and bias on the normalized
|
|
|
|
|
tensor to scale and shift if :attr:`scale` and :attr:`shift` are set.
|
|
|
|
|
|
|
|
|
|
Refer to `Layer Normalization <https://arxiv.org/pdf/1607.06450v1.pdf>`_
|
|
|
|
|
${comment}
|
|
|
|
|
|
|
|
|
|
The formula is as follows:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
.. math::
|
|
|
|
|
|
|
|
|
|
\\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i
|
|
|
|
|
|
|
|
|
@ -2005,6 +2001,15 @@ def layer_norm(input,
|
|
|
|
|
|
|
|
|
|
h & = f(\\frac{g}{\\sigma}(a - \\mu) + b)
|
|
|
|
|
|
|
|
|
|
* :math:`a`: the vector representation of the summed inputs to the neurons
|
|
|
|
|
in that layer.
|
|
|
|
|
|
|
|
|
|
* :math:`H`: the number of hidden units in a layers
|
|
|
|
|
|
|
|
|
|
* :math:`g`: the trainable scale parameter.
|
|
|
|
|
|
|
|
|
|
* :math:`b`: the trainable bias parameter.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input(Variable): The input tensor variable.
|
|
|
|
|
scale(bool): Whether to learn the adaptive gain :math:`g` after
|
|
|
|
@ -2023,14 +2028,13 @@ def layer_norm(input,
|
|
|
|
|
name (str): The name of this layer. It is optional.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: A tensor variable with the same shape as the input.
|
|
|
|
|
${y_comment}
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
data = fluid.layers.data(
|
|
|
|
|
name='data', shape=[3, 32, 32], dtype='float32')
|
|
|
|
|
x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
|
|
|
|
|
>>> data = fluid.layers.data(name='data', shape=[3, 32, 32],
|
|
|
|
|
>>> dtype='float32')
|
|
|
|
|
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
|
|
|
|
|
"""
|
|
|
|
|
helper = LayerHelper('layer_norm', **locals())
|
|
|
|
|
dtype = helper.input_dtype()
|
|
|
|
@ -3739,29 +3743,13 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@templatedoc()
|
|
|
|
|
def row_conv(input, future_context_size, param_attr=None, act=None):
|
|
|
|
|
"""Row Conv Operator. This layer will apply lookahead convolution to
|
|
|
|
|
**input**. The input variable should be a 2D LoDTensor with shape [T, D].
|
|
|
|
|
Parameters with shape [future_context_size + 1, D] will be created. The math
|
|
|
|
|
equation of row convolution is as follows:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j}
|
|
|
|
|
|
|
|
|
|
In the above equation:
|
|
|
|
|
|
|
|
|
|
* :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
|
|
|
|
|
* :math:`\\tau`: Future context size.
|
|
|
|
|
* :math:`X_{j}`: The j-th row of input variable with shape [1, D].
|
|
|
|
|
* :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
|
|
|
|
|
|
|
|
|
|
More details about row_conv please refer to the paper \
|
|
|
|
|
(http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
|
|
|
|
|
the design document \
|
|
|
|
|
(https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).
|
|
|
|
|
"""
|
|
|
|
|
${comment}
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
|
|
|
|
|
input (${x_type}): ${x_comment}.
|
|
|
|
|
future_context_size (int): Future context size. Please note, the shape
|
|
|
|
|
of convolution kernel is [future_context_size + 1, D].
|
|
|
|
|
param_attr (ParamAttr): Attributes of parameters, including
|
|
|
|
@ -3769,14 +3757,13 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
|
|
|
|
|
act (str): Non-linear activation to be applied to output variable.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: The output tensor with same shape as input tensor.
|
|
|
|
|
${out_comment}.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
x = fluid.layers.data(name='x', shape=[16],
|
|
|
|
|
dtype='float32', lod_level=1)
|
|
|
|
|
out = fluid.layers.row_conv(input=x, future_context_size=2)
|
|
|
|
|
>>> import paddle.fluid as fluid
|
|
|
|
|
>>> x = fluid.layers.data(name='x', shape=[16],
|
|
|
|
|
>>> dtype='float32', lod_level=1)
|
|
|
|
|
>>> out = fluid.layers.row_conv(input=x, future_context_size=2)
|
|
|
|
|
"""
|
|
|
|
|
helper = LayerHelper('row_conv', **locals())
|
|
|
|
|
dtype = helper.input_dtype()
|
|
|
|
@ -3792,42 +3779,23 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
|
|
|
|
|
return helper.append_activation(out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@templatedoc()
|
|
|
|
|
def multiplex(inputs, index):
|
|
|
|
|
"""
|
|
|
|
|
**Multiplex Layer**
|
|
|
|
|
|
|
|
|
|
Referring to the given index variable, this layer selects rows from the
|
|
|
|
|
input variables to construct a multiplex variable. Assuming that there are
|
|
|
|
|
:math:`m` input variables and :math:`I_i` represents the i-th input
|
|
|
|
|
variable and :math:`i` is in [0, :math:`m`). All input variables are
|
|
|
|
|
tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`].
|
|
|
|
|
Please note that rank of the input tensor should be at least 2. Each input
|
|
|
|
|
variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`]
|
|
|
|
|
where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2`
|
|
|
|
|
* ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input
|
|
|
|
|
variable. The given index variable should be a 2-D tensor with shape
|
|
|
|
|
[:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable.
|
|
|
|
|
Then the output variable will be a tensor with shape [:math:`d_0`,
|
|
|
|
|
:math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D
|
|
|
|
|
matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th
|
|
|
|
|
row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`.
|
|
|
|
|
${comment}
|
|
|
|
|
|
|
|
|
|
>>> import paddle.fluid as fluid
|
|
|
|
|
>>> x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
|
|
|
|
|
>>> x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
|
|
|
|
|
>>> index = fluid.layers.data(name='index', shape=[1], dtype='int32')
|
|
|
|
|
>>> out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
inputs (list): A list of variables to gather from. All variables have the
|
|
|
|
|
same shape and the rank is at least 2.
|
|
|
|
|
index (Variable): Tensor<int32>, index variable which is a 2-D tensor
|
|
|
|
|
with shape [M, 1] where M is the batch size.
|
|
|
|
|
inputs (list): ${x_comment}.
|
|
|
|
|
index (${ids_type}): ${ids_comment}.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: Multiplex variable gathered from input variables.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
|
|
|
|
|
x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
|
|
|
|
|
index = fluid.layers.data(name='index', shape=[1], dtype='int32')
|
|
|
|
|
out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
|
|
|
|
|
${out_comment}.
|
|
|
|
|
"""
|
|
|
|
|
helper = LayerHelper('multiplex', **locals())
|
|
|
|
|
|
|
|
|
|