You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
271 lines
12 KiB
271 lines
12 KiB
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import print_function
|
|
import warnings
|
|
from .framework import Variable, in_dygraph_mode
|
|
from .layer_helper import LayerHelper
|
|
from .data_feeder import convert_dtype
|
|
|
|
__all__ = ['one_hot', 'embedding']
|
|
|
|
|
|
def one_hot(input, depth, allow_out_of_range=False):
|
|
"""
|
|
|
|
The operator converts each id in the input to an one-hot vector with a
|
|
depth length. The value in the vector dimension corresponding to the id
|
|
is 1, and the value in the remaining dimension is 0.
|
|
|
|
The shape of output Tensor or LoDTensor is generated by appending depth dimension
|
|
behind the last dimension of the input shape.
|
|
|
|
.. code-block:: text
|
|
|
|
Example 1 (allow_out_of_range=False):
|
|
|
|
input:
|
|
X.shape = [4]
|
|
X.data = [1, 1, 3, 0]
|
|
depth = 4
|
|
|
|
output:
|
|
Out.shape = [4, 4]
|
|
Out.data = [[0., 1., 0., 0.],
|
|
[0., 1., 0., 0.],
|
|
[0., 0., 0., 1.],
|
|
[1., 0., 0., 0.]]
|
|
|
|
Example 2 (allow_out_of_range=True):
|
|
|
|
input:
|
|
X.shape = [4]
|
|
X.data = [1, 1, 5, 0]
|
|
depth = 4
|
|
allow_out_of_range = True
|
|
|
|
output:
|
|
Out.shape = [4, 4]
|
|
Out.data = [[0., 1., 0., 0.],
|
|
[0., 1., 0., 0.],
|
|
[0., 0., 0., 0.], # This id is 5, which goes beyond depth, so set it all-zeros data.
|
|
[1., 0., 0., 0.]]
|
|
|
|
Example 3 (allow_out_of_range=False):
|
|
|
|
input:
|
|
X.shape = [4]
|
|
X.data = [1, 1, 5, 0]
|
|
depth = 4
|
|
allow_out_of_range = False
|
|
|
|
output: Throw an exception for Illegal value
|
|
The second dimension in X is 5, which is greater than depth.
|
|
Allow_out_of_range =False means that does not allow the word id to exceed depth,
|
|
so it throws an exception.
|
|
|
|
|
|
Args:
|
|
input(Variable): Tensor or LoDTensor with shape :math:`[N_1, N_2, ..., N_k]` ,
|
|
which contains at least one dimension. The data type is int32 or int64.
|
|
depth(int): An integer defining the depth of the one hot dimension. If input
|
|
is word id, depth is generally the dictionary size.
|
|
allow_out_of_range(bool): A bool value indicating whether the input
|
|
indices could be out of range :math:`[0, depth)` . When input indices are
|
|
out of range, exceptions :code:`Illegal value` is raised if :attr:`allow_out_of_range`
|
|
is False, or zero-filling representations is created if it is set True.
|
|
Default: False.
|
|
|
|
Returns:
|
|
Variable: The one-hot representations of input. A Tensor or LoDTensor with type float32.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
# Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4].
|
|
label = fluid.data(name="label", shape=[4], dtype="int64")
|
|
one_hot_label = fluid.one_hot(input=label, depth=4)
|
|
"""
|
|
helper = LayerHelper("one_hot_v2", **locals())
|
|
|
|
one_hot_out = helper.create_variable_for_type_inference(dtype='float32')
|
|
|
|
if in_dygraph_mode():
|
|
inputs = {'X': input}
|
|
attrs = {'depth': depth}
|
|
else:
|
|
if not isinstance(depth, Variable):
|
|
# user attribute
|
|
inputs = {'X': input}
|
|
attrs = {'depth': depth}
|
|
else:
|
|
depth.stop_gradient = True
|
|
inputs = {'X': input, 'depth_tensor': depth}
|
|
attrs = {}
|
|
helper.append_op(
|
|
type="one_hot_v2",
|
|
inputs=inputs,
|
|
attrs=attrs,
|
|
outputs={'Out': one_hot_out},
|
|
stop_gradient=True)
|
|
return one_hot_out
|
|
|
|
|
|
def embedding(input,
|
|
size,
|
|
is_sparse=False,
|
|
is_distributed=False,
|
|
padding_idx=None,
|
|
param_attr=None,
|
|
dtype='float32'):
|
|
"""
|
|
|
|
The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
|
|
It automatically constructs a 2D embedding matrix based on the
|
|
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
|
|
|
|
The shape of output Tensor is generated by appending an emb_size dimension to the
|
|
last dimension of the input Tensor shape.
|
|
|
|
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
|
|
otherwise the program will throw an exception and exit.
|
|
|
|
.. code-block:: text
|
|
|
|
Case 1:
|
|
|
|
input is a Tensor. padding_idx = -1
|
|
input.data = [[1, 3], [2, 4], [4, 127]]
|
|
input.shape = [3, 2]
|
|
Given size = [128, 16]
|
|
output is a Tensor:
|
|
out.shape = [3, 2, 16]
|
|
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
|
|
[0.345421456, 0.524563927, ..., 0.144534654]],
|
|
|
|
[[0.345249859, 0.124939536, ..., 0.194353745],
|
|
[0.945345345, 0.435394634, ..., 0.435345365]],
|
|
|
|
[[0.945345345, 0.435394634, ..., 0.435345365],
|
|
[0.0, 0.0, ..., 0.0 ]]] # padding data
|
|
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
|
|
It will pad all-zero data when ids is 127.
|
|
|
|
Case 2:
|
|
|
|
input is a LoDTensor with 1-level LoD. padding_idx = 0
|
|
input.lod = [[2, 3]]
|
|
input.data = [[1], [3], [2], [4], [0]]
|
|
input.shape = [5, 1]
|
|
Given size = [128, 16]
|
|
output is a LoDTensor:
|
|
out.lod = [[2, 3]]
|
|
out.shape = [5, 1, 16]
|
|
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
|
|
[[0.345421456, 0.524563927, ..., 0.144534654]],
|
|
[[0.345249859, 0.124939536, ..., 0.194353745]],
|
|
[[0.945345345, 0.435394634, ..., 0.435345365]],
|
|
[[0.0, 0.0, ..., 0.0 ]]] # padding data
|
|
It will pad all-zero data when ids is 0.
|
|
|
|
|
|
Args:
|
|
input(Variable): A Tensor or LoDTensor with type int64, which contains the id information.
|
|
The value of the input id should satisfy :math:`0<= id < size[0]` .
|
|
size(tuple|list): The shape of lookup table parameter. It should have two elements which
|
|
indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
|
|
is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
|
|
affects the performance of the backwards gradient update. It is recommended to set
|
|
True because sparse update is faster. But some optimizer does not support sparse update,
|
|
such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` ,
|
|
:ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
|
|
:ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
|
|
In these case, is_sparse must be False. Default: False.
|
|
is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
|
|
in multi-machine distributed CPU training. Default: False.
|
|
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
|
|
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
|
|
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
|
|
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
|
|
If set None, it makes no effect to output. Default: None.
|
|
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
|
|
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
|
|
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
|
|
The local word vector needs to be transformed into numpy format, and the shape of local word
|
|
vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
|
|
is used to load custom or pre-trained word vectors. See code example 2 for details.
|
|
dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor.
|
|
It must be float32 or float64. Default: float32.
|
|
|
|
Returns:
|
|
Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import numpy as np
|
|
data = fluid.data(name='x', shape=[None, 10], dtype='int64')
|
|
|
|
# exampel 1
|
|
emb_1 = fluid.embedding(input=data, size=[128, 64])
|
|
|
|
# example 2: load custom or pre-trained word vectors
|
|
weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format
|
|
w_param_attrs = fluid.ParamAttr(
|
|
name="emb_weight",
|
|
learning_rate=0.5,
|
|
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
|
|
trainable=True)
|
|
emb_2 = fluid.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32')
|
|
"""
|
|
|
|
helper = LayerHelper('embedding', **locals())
|
|
if not isinstance(input, Variable):
|
|
raise TypeError(
|
|
"The type of 'input' in fluid.embedding must be Variable, but received %s"
|
|
% (type(input)))
|
|
if convert_dtype(input.dtype) not in ['int64']:
|
|
raise TypeError(
|
|
"The data type of 'input' in fluid.embedding must be int64, but received %s."
|
|
% (convert_dtype(input.dtype)))
|
|
if convert_dtype(dtype) in ['float16']:
|
|
warnings.warn(
|
|
"The 'dtype' of fluid.embedding only support float16 in GPU now.")
|
|
if convert_dtype(dtype) not in ['float16', 'float32', 'float64']:
|
|
raise TypeError(
|
|
"The 'dtype' of fluid.embedding must be float16, float32 or float64, but received %s."
|
|
% (convert_dtype(dtype)))
|
|
remote_prefetch = is_sparse and (not is_distributed)
|
|
if remote_prefetch:
|
|
assert is_sparse is True and is_distributed is False
|
|
w = helper.create_parameter(
|
|
attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
|
|
tmp = helper.create_variable_for_type_inference(dtype)
|
|
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
|
|
size[0] + padding_idx)
|
|
helper.append_op(
|
|
type='lookup_table_v2',
|
|
inputs={'Ids': input,
|
|
'W': w},
|
|
outputs={'Out': tmp},
|
|
attrs={
|
|
'is_sparse': is_sparse,
|
|
'is_distributed': is_distributed,
|
|
'remote_prefetch': remote_prefetch,
|
|
'padding_idx': padding_idx
|
|
})
|
|
return tmp
|