|
|
|
|
@ -1357,29 +1357,72 @@ class Embedding(layers.Layer):
|
|
|
|
|
"""
|
|
|
|
|
**Embedding Layer**
|
|
|
|
|
|
|
|
|
|
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
|
|
|
|
|
a lookup table. The result of this lookup is the embedding of each ID in the
|
|
|
|
|
:attr:`input`.
|
|
|
|
|
All the input variables are passed in as local variables to the LayerHelper constructor
|
|
|
|
|
This interface is used to construct a callable object of the ``Embedding`` class.
|
|
|
|
|
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
|
|
|
|
|
This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
|
|
|
|
|
It automatically constructs a 2D embedding matrix based on the
|
|
|
|
|
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
|
|
|
|
|
|
|
|
|
|
This layer requires the last dimension of Tensor shape must be equal to 1. The shape
|
|
|
|
|
of output Tensor is generated by replacing the last dimension of the input Tensor shape
|
|
|
|
|
with emb_size.
|
|
|
|
|
|
|
|
|
|
The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
|
|
|
|
|
otherwise the program will throw an exception and exit.
|
|
|
|
|
|
|
|
|
|
.. code-block:: text
|
|
|
|
|
|
|
|
|
|
Case 1:
|
|
|
|
|
|
|
|
|
|
input is a Tensor. padding_idx = -1
|
|
|
|
|
input.data = [[[1], [3]], [[2], [4]], [[4], [127]]]
|
|
|
|
|
input.shape = [3, 2, 1]
|
|
|
|
|
Given size = [128, 16]
|
|
|
|
|
output is a Tensor:
|
|
|
|
|
out.shape = [3, 2, 16]
|
|
|
|
|
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
|
|
|
|
|
[0.345421456, 0.524563927, ..., 0.144534654]],
|
|
|
|
|
|
|
|
|
|
[[0.345249859, 0.124939536, ..., 0.194353745],
|
|
|
|
|
[0.945345345, 0.435394634, ..., 0.435345365]],
|
|
|
|
|
|
|
|
|
|
[[0.945345345, 0.435394634, ..., 0.435345365],
|
|
|
|
|
[0.0, 0.0, ..., 0.0 ]]] # padding data
|
|
|
|
|
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
|
|
|
|
|
It will pad all-zero data when ids is 127.
|
|
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
|
name_scope(str): The name of this class.
|
|
|
|
|
size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size
|
|
|
|
|
of the dictionary of embeddings and the size of each embedding vector respectively.
|
|
|
|
|
is_sparse(bool): The flag indicating whether to use sparse update. Default: False
|
|
|
|
|
is_distributed(bool): Whether to run lookup table from remote parameter server. Default: False.
|
|
|
|
|
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
|
|
|
|
|
Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters
|
|
|
|
|
it in :attr:`input`. If :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. Default: None.
|
|
|
|
|
param_attr(ParamAttr): Parameters for this layer. Default: None.
|
|
|
|
|
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc. Default: 'float32'.
|
|
|
|
|
is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
|
|
|
|
|
affects the performance of the backwards gradient update. It is recommended to set
|
|
|
|
|
True because sparse update is faster. But some optimizer does not support sparse update,
|
|
|
|
|
such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` ,
|
|
|
|
|
:ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` ,
|
|
|
|
|
:ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` .
|
|
|
|
|
In these case, is_sparse must be False. Default: False.
|
|
|
|
|
is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used
|
|
|
|
|
in multi-machine distributed CPU training. Default: False.
|
|
|
|
|
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
|
|
|
|
|
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
|
|
|
|
|
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
|
|
|
|
|
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
|
|
|
|
|
If set None, it makes no effect to output. Default: None.
|
|
|
|
|
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
|
|
|
|
|
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
|
|
|
|
|
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
|
|
|
|
|
The local word vector needs to be transformed into numpy format, and the shape of local word
|
|
|
|
|
vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
|
|
|
|
|
is used to load custom or pre-trained word vectors. See code example 2 for details.
|
|
|
|
|
dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor.
|
|
|
|
|
It must be "float32" or "float64". Default: "float32".
|
|
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
|
weight (Parameter): the learnable weights of this layer.
|
|
|
|
|
Attribute:
|
|
|
|
|
**weight** (Parameter): the learnable weights of this layer.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: The tensor variable storing the embeddings of the \
|
|
|
|
|
supplied inputs.
|
|
|
|
|
Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` .
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
|
|
@ -1389,6 +1432,7 @@ class Embedding(layers.Layer):
|
|
|
|
|
import paddle.fluid.dygraph.base as base
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
# example 1
|
|
|
|
|
inp_word = np.array([[[1]]]).astype('int64')
|
|
|
|
|
dict_size = 20
|
|
|
|
|
with fluid.dygraph.guard():
|
|
|
|
|
@ -1398,6 +1442,21 @@ class Embedding(layers.Layer):
|
|
|
|
|
param_attr='emb.w',
|
|
|
|
|
is_sparse=False)
|
|
|
|
|
static_rlt3 = emb(base.to_variable(inp_word))
|
|
|
|
|
|
|
|
|
|
# example 2: load custom or pre-trained word vectors
|
|
|
|
|
weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format
|
|
|
|
|
w_param_attrs = fluid.ParamAttr(
|
|
|
|
|
name="emb_weight",
|
|
|
|
|
learning_rate=0.5,
|
|
|
|
|
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
|
|
|
|
|
trainable=True)
|
|
|
|
|
with fluid.dygraph.guard():
|
|
|
|
|
emb = fluid.dygraph.Embedding(
|
|
|
|
|
name_scope='embedding',
|
|
|
|
|
size=[128, 100],
|
|
|
|
|
param_attr= w_param_attrs,
|
|
|
|
|
is_sparse=False)
|
|
|
|
|
static_rlt3 = emb(base.to_variable(inp_word))
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
|