You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
138 lines
5.2 KiB
138 lines
5.2 KiB
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import print_function
|
|
from .framework import Variable, in_dygraph_mode
|
|
from .layer_helper import LayerHelper
|
|
|
|
__all__ = ['one_hot', 'embedding']
|
|
|
|
|
|
def one_hot(input, depth, allow_out_of_range=False):
|
|
"""
|
|
This layer creates the one-hot representations for input indices.
|
|
|
|
Args:
|
|
input(Variable): Input indices represent locations, which takes value 1.0
|
|
in indices, while all other locations take value 0.
|
|
depth(scalar): An interger defining the depth of the one-hot dimension.
|
|
allow_out_of_range(bool): A bool value indicating whether the input
|
|
indices could be out of range [0, depth). When input indices are
|
|
out of range, exceptions is raised if allow_out_of_range is False,
|
|
or zero-filling representations is created if it is set True
|
|
|
|
Returns:
|
|
Variable: The one-hot representations of input.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
|
one_hot_label = fluid.one_hot(input=label, depth=10)
|
|
"""
|
|
helper = LayerHelper("one_hot_v2", **locals())
|
|
|
|
one_hot_out = helper.create_variable_for_type_inference(dtype='float32')
|
|
|
|
if in_dygraph_mode():
|
|
inputs = {'X': input}
|
|
attrs = {'depth': depth}
|
|
else:
|
|
if not isinstance(depth, Variable):
|
|
# user attribute
|
|
inputs = {'X': input}
|
|
attrs = {'depth': depth}
|
|
else:
|
|
depth.stop_gradient = True
|
|
inputs = {'X': input, 'depth_tensor': depth}
|
|
attrs = {}
|
|
helper.append_op(
|
|
type="one_hot_v2",
|
|
inputs=inputs,
|
|
attrs=attrs,
|
|
outputs={'Out': one_hot_out},
|
|
stop_gradient=True)
|
|
return one_hot_out
|
|
|
|
|
|
def embedding(input,
|
|
size,
|
|
is_sparse=False,
|
|
is_distributed=False,
|
|
padding_idx=None,
|
|
param_attr=None,
|
|
dtype='float32'):
|
|
"""
|
|
**Embedding Layer**
|
|
|
|
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
|
|
a lookup table. The result of this lookup is the embedding of each ID in the
|
|
:attr:`input`.
|
|
|
|
All the input variables are passed in as local variables to the LayerHelper
|
|
constructor.
|
|
|
|
Args:
|
|
input(Variable): Input is a Tensor<int64> Variable, which contains the IDs information.
|
|
The value of the input IDs should satisfy :math:`0<= id < size[0]`.
|
|
size(tuple|list): The shape of the look up table parameter. It should
|
|
have two elements which indicate the size of the dictionary of
|
|
embeddings and the size of each embedding vector respectively.
|
|
is_sparse(bool): The flag indicating whether to use sparse update.
|
|
is_distributed(bool): Whether to run lookup table from remote parameter server.
|
|
padding_idx(int|long|None): It will output all-zero padding data whenever
|
|
lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes
|
|
no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx`
|
|
will automatically be converted to :math:`size[0] + padding\_idx` to use.
|
|
Default: None.
|
|
param_attr(ParamAttr): Parameters for this layer.
|
|
dtype(np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output
|
|
tensor. It can be float32, float_16, int etc.
|
|
|
|
Returns:
|
|
Variable: The tensor variable storing the embeddings of the \
|
|
supplied inputs.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
# [batch_size, 20] -> [batch_size, 20, 64]
|
|
data = fluid.layers.data(name='sequence', shape=[20], dtype='int64')
|
|
emb = fluid.embedding(input=data, size=[128, 64])
|
|
"""
|
|
|
|
helper = LayerHelper('embedding', **locals())
|
|
remote_prefetch = is_sparse and (not is_distributed)
|
|
if remote_prefetch:
|
|
assert is_sparse is True and is_distributed is False
|
|
w = helper.create_parameter(
|
|
attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
|
|
tmp = helper.create_variable_for_type_inference(dtype)
|
|
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
|
|
size[0] + padding_idx)
|
|
helper.append_op(
|
|
type='lookup_table_v2',
|
|
inputs={'Ids': input,
|
|
'W': w},
|
|
outputs={'Out': tmp},
|
|
attrs={
|
|
'is_sparse': is_sparse,
|
|
'is_distributed': is_distributed,
|
|
'remote_prefetch': remote_prefetch,
|
|
'padding_idx': padding_idx
|
|
})
|
|
return tmp
|