|
|
|
@ -28,7 +28,7 @@ __all__ = [
|
|
|
|
|
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
|
|
|
|
|
'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
|
|
|
|
|
'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
|
|
|
|
|
'l2_normalize', 'matmul', 'warpctc'
|
|
|
|
|
'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape'
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -213,33 +213,33 @@ def dynamic_lstm(input,
|
|
|
|
|
(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
|
|
|
|
|
i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
|
|
|
|
|
|
|
|
|
|
f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
|
|
|
|
|
i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
|
|
|
|
|
|
|
|
|
|
\\tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
|
|
|
|
|
f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
|
|
|
|
|
|
|
|
|
|
o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
|
|
|
|
|
\\tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
|
|
|
|
|
|
|
|
|
|
c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
|
|
|
|
|
o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
|
|
|
|
|
|
|
|
|
|
c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
|
|
|
|
|
|
|
|
|
|
h_t & = o_t \odot act_h(c_t)
|
|
|
|
|
|
|
|
|
|
where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is
|
|
|
|
|
where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is
|
|
|
|
|
the matrix of weights from the input gate to the input), :math:`W_{ic}, \
|
|
|
|
|
W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In
|
|
|
|
|
our implementation, we use vectors to reprenset these diagonal weight
|
|
|
|
|
matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input
|
|
|
|
|
gate bias vector), :math:`\sigma` is the non-line activations, such as
|
|
|
|
|
logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input
|
|
|
|
|
gate, forget gate, output gate, and cell activation vectors, respectively,
|
|
|
|
|
W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In
|
|
|
|
|
our implementation, we use vectors to reprenset these diagonal weight
|
|
|
|
|
matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input
|
|
|
|
|
gate bias vector), :math:`\sigma` is the non-line activations, such as
|
|
|
|
|
logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input
|
|
|
|
|
gate, forget gate, output gate, and cell activation vectors, respectively,
|
|
|
|
|
all of which have the same size as the cell output activation vector :math:`h`.
|
|
|
|
|
|
|
|
|
|
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
|
|
|
|
|
and :math:`act_h` are the cell input and cell output activation functions
|
|
|
|
|
and `tanh` is usually used for them. :math:`\\tilde{c_t}` is also called
|
|
|
|
|
candidate hidden state, which is computed based on the current input and
|
|
|
|
|
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
|
|
|
|
|
and :math:`act_h` are the cell input and cell output activation functions
|
|
|
|
|
and `tanh` is usually used for them. :math:`\\tilde{c_t}` is also called
|
|
|
|
|
candidate hidden state, which is computed based on the current input and
|
|
|
|
|
the previous hidden state.
|
|
|
|
|
|
|
|
|
|
Set `use_peepholes` to `False` to disable peephole connection. The formula
|
|
|
|
@ -251,38 +251,38 @@ def dynamic_lstm(input,
|
|
|
|
|
Users can choose to use fully-connect layer before LSTM layer.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input(Variable): The input of dynamic_lstm layer, which supports
|
|
|
|
|
variable-time length input sequence. The underlying
|
|
|
|
|
tensor in this Variable is a matrix with shape
|
|
|
|
|
(T X 4D), where T is the total time steps in this
|
|
|
|
|
input(Variable): The input of dynamic_lstm layer, which supports
|
|
|
|
|
variable-time length input sequence. The underlying
|
|
|
|
|
tensor in this Variable is a matrix with shape
|
|
|
|
|
(T X 4D), where T is the total time steps in this
|
|
|
|
|
mini-batch, D is the hidden size.
|
|
|
|
|
size(int): 4 * hidden size.
|
|
|
|
|
param_attr(ParamAttr): The parameter attribute for the learnable
|
|
|
|
|
hidden-hidden weights.
|
|
|
|
|
param_attr(ParamAttr): The parameter attribute for the learnable
|
|
|
|
|
hidden-hidden weights.
|
|
|
|
|
|
|
|
|
|
- The shape is (D x 4D), where D is the hidden
|
|
|
|
|
size.
|
|
|
|
|
- The shape is (D x 4D), where D is the hidden
|
|
|
|
|
size.
|
|
|
|
|
- Weights = {:math:`W_{ch}, W_{ih}, \
|
|
|
|
|
W_{fh}, W_{oh}`}
|
|
|
|
|
bias_attr(ParamAttr): The bias attribute for the learnable bias
|
|
|
|
|
weights, which contains two parts, input-hidden
|
|
|
|
|
bias weights and peephole connections weights if
|
|
|
|
|
setting `use_peepholes` to `True`.
|
|
|
|
|
weights, which contains two parts, input-hidden
|
|
|
|
|
bias weights and peephole connections weights if
|
|
|
|
|
setting `use_peepholes` to `True`.
|
|
|
|
|
|
|
|
|
|
1. `use_peepholes = False`
|
|
|
|
|
- The shape is (1 x 4D).
|
|
|
|
|
1. `use_peepholes = False`
|
|
|
|
|
- The shape is (1 x 4D).
|
|
|
|
|
- Biases = {:math:`b_c, b_i, b_f, b_o`}.
|
|
|
|
|
2. `use_peepholes = True`
|
|
|
|
|
- The shape is (1 x 7D).
|
|
|
|
|
2. `use_peepholes = True`
|
|
|
|
|
- The shape is (1 x 7D).
|
|
|
|
|
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
|
|
|
|
|
W_{fc}, W_{oc}`}.
|
|
|
|
|
use_peepholes(bool): Whether to enable diagonal/peephole connections,
|
|
|
|
|
use_peepholes(bool): Whether to enable diagonal/peephole connections,
|
|
|
|
|
default `True`.
|
|
|
|
|
is_reverse(bool): Whether to compute reversed LSTM, default `False`.
|
|
|
|
|
gate_activation(str): The activation for input gate, forget gate and
|
|
|
|
|
output gate. Choices = ["sigmoid", "tanh", "relu",
|
|
|
|
|
gate_activation(str): The activation for input gate, forget gate and
|
|
|
|
|
output gate. Choices = ["sigmoid", "tanh", "relu",
|
|
|
|
|
"identity"], default "sigmoid".
|
|
|
|
|
cell_activation(str): The activation for cell output. Choices = ["sigmoid",
|
|
|
|
|
cell_activation(str): The activation for cell output. Choices = ["sigmoid",
|
|
|
|
|
"tanh", "relu", "identity"], default "tanh".
|
|
|
|
|
candidate_activation(str): The activation for candidate hidden state.
|
|
|
|
|
Choices = ["sigmoid", "tanh", "relu", "identity"],
|
|
|
|
@ -1914,3 +1914,57 @@ def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
|
|
|
|
|
attrs={'blank': blank,
|
|
|
|
|
'norm_by_times': norm_by_times})
|
|
|
|
|
return loss_out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sequence_reshape(input, new_dim):
|
|
|
|
|
"""
|
|
|
|
|
**Sequence Reshape Layer**
|
|
|
|
|
|
|
|
|
|
This layer will rearrange the input sequences. The new dimension is set by
|
|
|
|
|
user. Length of each sequence is computed according to original length,
|
|
|
|
|
original dimension and new dimension. The following example will help to
|
|
|
|
|
illustrate the function of this layer:
|
|
|
|
|
|
|
|
|
|
.. code-block:: text
|
|
|
|
|
|
|
|
|
|
x is a LoDTensor:
|
|
|
|
|
x.lod = [[0, 2, 6]]
|
|
|
|
|
x.data = [[1, 2], [3, 4],
|
|
|
|
|
[5, 6], [7, 8], [9, 10], [11, 12]]
|
|
|
|
|
x.dims = [6, 2]
|
|
|
|
|
|
|
|
|
|
set new_dim = 4
|
|
|
|
|
|
|
|
|
|
then out is a LoDTensor:
|
|
|
|
|
out.lod = [[0, 1, 3]]
|
|
|
|
|
out.data = [[1, 2, 3, 4],
|
|
|
|
|
[5, 6, 7, 8], [9, 10, 11, 12]]
|
|
|
|
|
out.dims = [3, 4]
|
|
|
|
|
|
|
|
|
|
Currently, only 1-level LoDTensor is supported and please make sure
|
|
|
|
|
(original length * original dimension) can be divided by new dimension with
|
|
|
|
|
no remainder for each sequence.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input (Variable): (LodTensor, default: LoDTensor<float>), a 2-D LoDTensor
|
|
|
|
|
with shape being [N, M] where M for dimension.
|
|
|
|
|
new_dim (int): New dimension which the input LoDTensor is reshaped to.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: Reshaped LoDTensor according to new dimension.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
x = fluid.layers.data(name='x', shape=[5, 20],
|
|
|
|
|
dtype='float32', lod_level=1)
|
|
|
|
|
x_reshaped = layers.sequence_reshape(input=x, new_dim=10)
|
|
|
|
|
"""
|
|
|
|
|
helper = LayerHelper('sequence_reshape', **locals())
|
|
|
|
|
out = helper.create_tmp_variable(helper.input_dtype())
|
|
|
|
|
helper.append_op(
|
|
|
|
|
type='sequence_reshape',
|
|
|
|
|
inputs={'X': [input]},
|
|
|
|
|
outputs={'Out': [out]},
|
|
|
|
|
attrs={'new_dim': new_dim})
|
|
|
|
|
return out
|
|
|
|
|