You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
497 lines
20 KiB
497 lines
20 KiB
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Contrib layers just related to the neural network.
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
|
|
import numpy as np
|
|
import six
|
|
import os
|
|
import inspect
|
|
from paddle.fluid.layer_helper import LayerHelper
|
|
from paddle.fluid.layers import utils
|
|
|
|
__all__ = [
|
|
'fused_elemwise_activation',
|
|
'sequence_topk_avg_pooling',
|
|
'var_conv_2d',
|
|
'match_matrix_tensor',
|
|
'tree_conv',
|
|
'fused_embedding_seq_pool',
|
|
]
|
|
|
|
|
|
def fused_elemwise_activation(x,
|
|
y,
|
|
functor_list,
|
|
axis=-1,
|
|
scale=0.0,
|
|
save_intermediate_out=True):
|
|
"""
|
|
**Fused elementwise_add/mul and activation layers**
|
|
|
|
This function computes an elementwise_add/mul cooperated with an activation.
|
|
|
|
.. math::
|
|
|
|
out = Unary(Binary(x, y))
|
|
|
|
or
|
|
|
|
.. math::
|
|
|
|
out = Binary(x, Unary(y))
|
|
|
|
Unary operators can be: `scale`, `relu`, `tanh`. Binary operators can be:
|
|
`elementwise_add`, `elementwise_mul`.
|
|
|
|
Args:
|
|
x (Variable): left operation of the binary operator.
|
|
y (Variable): right operator of the binary operator.
|
|
functor_list (list of str): types of operator which will be executed
|
|
by this layer. For example, ['elementwise_add', 'relu']
|
|
(out = elementwise_add(x, relu(y))),
|
|
or ['relu', 'elemmentwise_add'] (out = relu(elementwise_add(x, y))).
|
|
axis (int32, default -1): axis of elementwise op.
|
|
scale (float32, default 0): parameter of scale op.
|
|
save_intermediate_out (bool, default True): whether to save the
|
|
intermediate result, Unary(y) or Binary(x, y).
|
|
|
|
Returns:
|
|
Variable: The computation result.
|
|
"""
|
|
if isinstance(functor_list, str):
|
|
functor_list = functor_list.split(',')
|
|
|
|
if not isinstance(functor_list, list) or len(functor_list) != 2:
|
|
raise ValueError(
|
|
'functor_list should be a list of str, and the length should be 2.')
|
|
|
|
helper = LayerHelper('fused_elemwise_activation', **locals())
|
|
out = helper.create_variable_for_type_inference(dtype=x.dtype)
|
|
intermediate_out = helper.create_variable_for_type_inference(dtype=x.dtype)
|
|
helper.append_op(
|
|
type='fused_elemwise_activation',
|
|
inputs={'X': x,
|
|
'Y': y},
|
|
outputs={'Out': out,
|
|
'IntermediateOut': intermediate_out},
|
|
attrs={
|
|
'axis': axis,
|
|
'scale': scale,
|
|
'save_intermediate_out': save_intermediate_out,
|
|
'functor_list': functor_list
|
|
})
|
|
return out
|
|
|
|
|
|
def var_conv_2d(input,
|
|
row,
|
|
col,
|
|
input_channel,
|
|
output_channel,
|
|
filter_size,
|
|
stride=1,
|
|
param_attr=None,
|
|
act=None,
|
|
dtype='float32',
|
|
name=None):
|
|
"""
|
|
The var_conv_2d layer calculates the output base on the :attr:`input` with variable length,
|
|
row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`,
|
|
and :attr:`col` are 1-level LodTensor. The covolution operation is same as conv2d layer with
|
|
padding. Besides, input.dims[1] should be 1.
|
|
|
|
.. code-block:: text
|
|
|
|
If input_channel is 2 and given row lodTensor and col lodTensor as follows:
|
|
row.lod = [[5, 4]]
|
|
col.lod = [[6, 7]]
|
|
input is a lodTensor:
|
|
input.lod = [[60, 56]] # where 60 = input_channel * 5 * 6
|
|
input.dims = [116, 1] # where 116 = 60 + 56
|
|
|
|
If set output_channel is 3, filter_size is [3, 3], stride is [1, 1]:
|
|
output.lod = [[90, 84]] # where 90 = output_channel * [(5-1)/stride + 1] * [(6-1)/stride + 1]
|
|
output.dims = [174, 1] # where 174 = 90 + 84
|
|
|
|
Args:
|
|
input (Variable): The input shoud be 1-level LodTensor with dims[1] equals 1.
|
|
row (Variable): The row shoud be 1-level LodTensor to provide height information.
|
|
col (Variable): The col shoud be 1-level LodTensor to provide width information.
|
|
input_channel (int): The number of input channel.
|
|
output_channel (int): The number of output channel.
|
|
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
|
|
it must contain two integers, (filter_size_H, filter_size_W).
|
|
Otherwise, the filter will be a square.
|
|
stride (int|tuple): The stride size. If stride is a tuple, it must
|
|
contain two integers, (stride_H, stride_W). Otherwise, the
|
|
stride_H = stride_W = stride. Default: stride = 1.
|
|
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
|
|
of var_conv2d. If it is set to None or one attribute of ParamAttr, var_conv2d
|
|
will create ParamAttr as param_attr. If the Initializer of the param_attr
|
|
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
|
|
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
|
|
act (str): Activation type, if it is set to None, activation is not appended.
|
|
Default: None
|
|
dtype ('float32'): The data type of parameter and output.
|
|
name (str|None): A name for this layer(optional). If set None, the layer
|
|
will be named automatically. Default: None
|
|
|
|
Returns:
|
|
Variable: Output variable with LoD specified by this layer.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import numpy as np
|
|
from paddle.fluid import layers
|
|
from paddle.fluid import contrib
|
|
|
|
x_lod_tensor = layers.data(name='x', shape=[1], lod_level=1)
|
|
row_lod_tensor = layers.data(name='row', shape=[6], lod_level=1)
|
|
col_lod_tensor = layers.data(name='col', shape=[6], lod_level=1)
|
|
out = contrib.var_conv_2d(input=x_lod_tensor,
|
|
row=row_lod_tensor,
|
|
col=col_lod_tensor,
|
|
input_channel=3,
|
|
output_channel=5,
|
|
filter_size=[3, 3],
|
|
stride=1)
|
|
"""
|
|
helper = LayerHelper('var_conv_2d', **locals())
|
|
x_shape = list(input.shape)
|
|
assert len(x_shape) == 2
|
|
|
|
filter_size = utils.convert_to_list(filter_size, 2, 'filter_size')
|
|
stride = utils.convert_to_list(stride, 2, 'stride')
|
|
|
|
filter_shape = [
|
|
int(output_channel),
|
|
int(input_channel) * filter_size[0] * filter_size[1]
|
|
]
|
|
filter_param = helper.create_parameter(
|
|
attr=helper.param_attr,
|
|
shape=filter_shape,
|
|
dtype=dtype, )
|
|
|
|
conv_res = helper.create_variable_for_type_inference(dtype)
|
|
tmp_res = helper.create_variable_for_type_inference(
|
|
dtype, stop_gradient=True)
|
|
|
|
helper.append_op(
|
|
type='var_conv_2d',
|
|
inputs={
|
|
'X': input,
|
|
'ROW': row,
|
|
'COLUMN': col,
|
|
'W': filter_param,
|
|
},
|
|
outputs={"Out": conv_res,
|
|
"Col": tmp_res},
|
|
attrs={
|
|
'InputChannel': input_channel,
|
|
'OutputChannel': output_channel,
|
|
'StrideH': stride[0],
|
|
'StrideW': stride[1],
|
|
'KernelH': filter_size[0],
|
|
'KernelW': filter_size[1],
|
|
})
|
|
|
|
return helper.append_activation(conv_res)
|
|
|
|
|
|
def match_matrix_tensor(x,
|
|
y,
|
|
channel_num,
|
|
act=None,
|
|
param_attr=None,
|
|
dtype='float32',
|
|
name=None):
|
|
"""
|
|
Calculate the semantic matching matrix of two word sequences with variable length.
|
|
Given a query A of length `n` and a title B of length `m`, the input shape are respectively
|
|
[n, h] and [m, h], which h is hidden_size. If :attr:`channel_num` is set to 3,
|
|
it will generate a learnable parameter matrix W with shape [h, 3, h].
|
|
Then the semantic matching matrix of query A and title B is calculated by
|
|
A * W * B.T = [n, h]*[h, 3, h]*[h, m] = [n, 3, m]. The learnable parameter matrix `W`
|
|
is equivalent to a fully connected layer in the calculation process. If :attr:`act` is provided,
|
|
the corresponding activation function will be applied to output matrix.
|
|
The :attr:`x` and :attr:`y` should be LodTensor and only one level LoD is supported.
|
|
|
|
.. code-block:: text
|
|
|
|
Given a 1-level LoDTensor x:
|
|
x.lod = [[2, 3, ]]
|
|
x.data = [[0.3, 0.1], [0.2, 0.3], [0.5, 0.6], [0.7, 0.1], [0.3, 0.4]]
|
|
x.dims = [5, 2]
|
|
y is a Tensor:
|
|
y.lod = [[3, 1, ]]
|
|
y.data = [[0.1, 0.2], [0.3, 0.7], [0.9, 0.2], [0.4, 0.1]]
|
|
y.dims = [4, 2]
|
|
set channel_num 2, then we get a 1-level LoDTensor:
|
|
out.lod = [[12, 6]] # where 12 = channel_num * x.lod[0][0] * y.lod[0][0]
|
|
out.dims = [18, 1] # where 18 = 12 + 6
|
|
|
|
Args:
|
|
x (Variable): Input variable x which should be 1-level LodTensor.
|
|
y (Variable): Input variable y which should be 1-level LodTensor.
|
|
channel_num (int): The channel number of learnable parameter W.
|
|
act (str, default None): Activation to be applied to the output of this layer.
|
|
param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
|
|
parameters/weights of this layer.
|
|
dtype ('float32'): The data type of w data.
|
|
name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None
|
|
|
|
Returns:
|
|
Variable: output with LoD specified by this layer.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import numpy as np
|
|
from paddle.fluid import layers
|
|
from paddle.fluid import contrib
|
|
|
|
x_lod_tensor = layers.data(name='x', shape=[10], lod_level=1)
|
|
y_lod_tensor = layers.data(name='y', shape=[10], lod_level=1)
|
|
out, out_tmp = contrib.match_matrix_tensor(x=x_lod_tensor, y=y_lod_tensor, channel_num=3)
|
|
"""
|
|
helper = LayerHelper('match_matrix_tensor', **locals())
|
|
|
|
x_shape = list(x.shape)
|
|
y_shape = list(y.shape)
|
|
assert len(x_shape) == 2 and len(y_shape) == 2 and x_shape[-1] == y_shape[
|
|
-1]
|
|
|
|
weight_shape = [x_shape[-1], channel_num, y_shape[-1]]
|
|
w = helper.create_parameter(
|
|
attr=helper.param_attr, shape=weight_shape, dtype=dtype, is_bias=False)
|
|
mm_res = helper.create_variable_for_type_inference(dtype)
|
|
tmp_res = helper.create_variable_for_type_inference(
|
|
dtype, stop_gradient=True)
|
|
helper.append_op(
|
|
type='match_matrix_tensor',
|
|
inputs={
|
|
'X': x,
|
|
'Y': y,
|
|
'W': w,
|
|
},
|
|
outputs={"Out": mm_res,
|
|
"Tmp": tmp_res},
|
|
attrs={'dim_t': channel_num})
|
|
|
|
return helper.append_activation(mm_res), tmp_res
|
|
|
|
|
|
def sequence_topk_avg_pooling(input, row, col, topks, channel_num):
|
|
"""
|
|
The :attr:`topks` is a list with incremental values in this function. For each topk,
|
|
it will average the topk features as an output feature for each channel of every
|
|
input sequence. Both :attr:`row` and :attr:`col` are LodTensor, which provide height
|
|
and width information for :attr:`input` tensor. If feature size of input sequence is less
|
|
than topk, it will padding 0 at the back.
|
|
|
|
.. code-block:: text
|
|
|
|
If channel_num is 2 and given row LoDTensor and col LoDTensor as follows:
|
|
row.lod = [[5, 4]]
|
|
col.lod = [[6, 7]]
|
|
|
|
input is a LoDTensor with input.lod[0][i] = channel_num * row.lod[0][i] * col.lod[0][i]
|
|
input.lod = [[60, 56]] # where 60 = channel_num * 5 * 6
|
|
input.dims = [116, 1] # where 116 = 60 + 56
|
|
|
|
If topks is [1, 3, 5], then we get a 1-level LoDTensor:
|
|
out.lod = [[5, 4]] # share Lod info with row LodTensor
|
|
out.dims = [9, 6] # where 6 = len(topks) * channel_num
|
|
|
|
Args:
|
|
input (Variable): The input should be 2D LodTensor with dims[1] equals 1.
|
|
row (Variable): The row shoud be 1-level LodTensor to provide the height information
|
|
of the input tensor data.
|
|
col (Variable): The col shoud be 1-level LodTensor to provide the width information
|
|
of the input tensor data.
|
|
topks (list): A list of incremental value to average the topk feature.
|
|
channel_num (int): The number of input channel.
|
|
|
|
Returns:
|
|
Variable: output LodTensor specified by this layer.
|
|
|
|
Examples:
|
|
|
|
.. code-block:: python
|
|
|
|
import numpy as np
|
|
from paddle.fluid import layers
|
|
from paddle.fluid import contrib
|
|
|
|
x_lod_tensor = layers.data(name='x', shape=[1], lod_level=1)
|
|
row_lod_tensor = layers.data(name='row', shape=[6], lod_level=1)
|
|
col_lod_tensor = layers.data(name='col', shape=[6], lod_level=1)
|
|
out = contrib.sequence_topk_avg_pooling(input=x_lod_tensor,
|
|
row=row_lod_tensor,
|
|
col=col_lod_tensor,
|
|
topks=[1, 3, 5],
|
|
channel_num=5)
|
|
"""
|
|
helper = LayerHelper('sequence_topk_avg_pooling', **locals())
|
|
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
|
|
pos = helper.create_variable_for_type_inference(
|
|
dtype=helper.input_dtype(), stop_gradient=True)
|
|
helper.append_op(
|
|
type='sequence_topk_avg_pooling',
|
|
inputs={'X': input,
|
|
'ROW': row,
|
|
'COLUMN': col},
|
|
outputs={'Out': out,
|
|
'pos': pos},
|
|
attrs={'topks': topks,
|
|
'channel_num': channel_num})
|
|
|
|
return out
|
|
|
|
|
|
def tree_conv(nodes_vector,
|
|
edge_set,
|
|
output_size,
|
|
num_filters=1,
|
|
max_depth=2,
|
|
act='tanh',
|
|
param_attr=None,
|
|
bias_attr=None,
|
|
name=None):
|
|
"""
|
|
${comment}
|
|
|
|
Args:
|
|
nodes_vector(${nodes_vector_type}): ${nodes_vector_comment}
|
|
edge_set(${edge_set_type}): ${edge_set_comment}
|
|
output_size(int): output feature width
|
|
num_filters(int): number of filters, Default 1
|
|
max_depth(int): max depth of filters, Default 2
|
|
act(str): activation function, Default tanh
|
|
param_attr(ParamAttr): the parameter attribute for the filters, Default None
|
|
bias_attr(ParamAttr): the parameter attribute for the bias of this layer, Default None
|
|
name(str): a name of this layer(optional). If set None, the layer will be named automatically, Default None
|
|
|
|
Returns:
|
|
out(${out_type}): ${out_comment}
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
# 10 for max_node_size of dataset, 5 for vector width
|
|
nodes_vector = fluid.layers.data(name='vectors', shape=[10, 5], dtype='float32')
|
|
# 10 for max_node_size of dataset, 2 for every edge has two nodes
|
|
# edges must be directional
|
|
edge_set = fluid.layers.data(name='edge_set', shape=[10, 2], dtype='float32')
|
|
# the shape of output will be [10, 6, 1],
|
|
# 10 for max_node_size of dataset, 6 for output size, 1 for 1 filter
|
|
out_vector = fluid.layers.tree_conv(nodes_vector, edge_set, 6, 1, 2)
|
|
# After reshape, output tensor could be nodes_vector for next tree convolution
|
|
out_vector = fluid.layers.reshape(out_vector, shape=[-1, 10, 6])
|
|
out_vector_2 = fluid.layers.tree_conv(out_vector, edge_set, 3, 4, 2)
|
|
# also output tensor could be pooling(the pooling in paper called global pooling)
|
|
pooled = fluid.layers.reduce_max(out_vector, dim=2) # global pooling
|
|
"""
|
|
helper = LayerHelper("tree_conv", **locals())
|
|
dtype = helper.input_dtype('nodes_vector')
|
|
feature_size = nodes_vector.shape[2]
|
|
W_shape = [feature_size, 3, output_size, num_filters]
|
|
W = helper.create_parameter(
|
|
attr=param_attr, shape=W_shape, dtype=dtype, is_bias=False)
|
|
out = helper.create_variable_for_type_inference(dtype=dtype)
|
|
helper.append_op(
|
|
type='tree_conv',
|
|
inputs={'NodesVector': nodes_vector,
|
|
'EdgeSet': edge_set,
|
|
'Filter': W},
|
|
outputs={'Out': out, },
|
|
attrs={'max_depth': max_depth})
|
|
if helper.bias_attr:
|
|
pre_activation = helper.append_bias_op(out)
|
|
else:
|
|
pre_activation = out
|
|
return helper.append_activation(pre_activation)
|
|
|
|
|
|
def fused_embedding_seq_pool(input,
|
|
size,
|
|
is_sparse=False,
|
|
padding_idx=None,
|
|
combiner='sum',
|
|
param_attr=None,
|
|
dtype='float32'):
|
|
"""
|
|
**Embedding Sequence pool**
|
|
|
|
This layer is the fusion of lookup table and sequence_pool.
|
|
|
|
Args:
|
|
input (Variable): Input is a Tensor<int64> Variable, which contains the IDs' information.
|
|
The value of the input IDs should satisfy :math:`0<= id < size[0]`.
|
|
size (tuple|list): The shape of the lookup_table parameter. It should
|
|
have two elements which indicate the size of the dictionary of
|
|
embedding and the size of each embedding vector respectively.
|
|
is_sparse (bool): The flag indicating whether to use sparse update.
|
|
Default: False.
|
|
padding_idx (int|long|None): It will output all-zero padding data whenever
|
|
lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes
|
|
no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx`
|
|
will automatically be converted to :math:`size[0] + padding\_idx` to use.
|
|
Default: None.
|
|
combiner (str): The pooling type of sequence_pool, and only support `sum`.
|
|
Default: sum.
|
|
param_attr (ParamAttr): Parameters for this layer.
|
|
dtype (np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output
|
|
tensor. It can be float32, float_16, int etc.
|
|
Returns:
|
|
The sequence pooling variable which is a Tensor.
|
|
Examples:
|
|
.. code-block:: python
|
|
import numpy as np
|
|
import paddle.fluid as fluid
|
|
|
|
dict_size = 20
|
|
data_t = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
|
|
padding_idx = np.random.randint(1, 10)
|
|
out = fluid.contrib.fused_embedding_seq_pool(
|
|
input=data_t,
|
|
size=[dict_size, 32],
|
|
param_attr='w',
|
|
padding_idx=padding_idx,
|
|
is_sparse=False)
|
|
"""
|
|
helper = LayerHelper('fused_embedding_seq_pool', **locals())
|
|
w = helper.create_parameter(
|
|
attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
|
|
out = helper.create_variable_for_type_inference(dtype)
|
|
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
|
|
size[0] + padding_idx)
|
|
helper.append_op(
|
|
type='fused_embedding_seq_pool',
|
|
inputs={'Ids': input,
|
|
'W': w},
|
|
outputs={'Out': out},
|
|
attrs={
|
|
'is_sparse': is_sparse,
|
|
'combiner': combiner,
|
|
'padding_idx': padding_idx
|
|
})
|
|
return out
|