|
|
|
@ -9,12 +9,33 @@ from ..param_attr import ParamAttr
|
|
|
|
|
from tensor import concat
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf',
|
|
|
|
|
'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
|
|
|
|
|
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
|
|
|
|
|
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
|
|
|
|
|
'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
|
|
|
|
|
'sequence_first_step', 'sequence_last_step', 'dropout'
|
|
|
|
|
'fc',
|
|
|
|
|
'embedding',
|
|
|
|
|
'dynamic_lstm',
|
|
|
|
|
'gru_unit',
|
|
|
|
|
'linear_chain_crf',
|
|
|
|
|
'crf_decoding',
|
|
|
|
|
'cos_sim',
|
|
|
|
|
'cross_entropy',
|
|
|
|
|
'square_error_cost',
|
|
|
|
|
'accuracy',
|
|
|
|
|
'chunk_eval',
|
|
|
|
|
'sequence_conv',
|
|
|
|
|
'conv2d',
|
|
|
|
|
'sequence_pool',
|
|
|
|
|
'pool2d',
|
|
|
|
|
'batch_norm',
|
|
|
|
|
'beam_search_decode',
|
|
|
|
|
'conv2d_transpose',
|
|
|
|
|
'sequence_expand',
|
|
|
|
|
'lstm_unit',
|
|
|
|
|
'reduce_sum',
|
|
|
|
|
'reduce_mean',
|
|
|
|
|
'reduce_max',
|
|
|
|
|
'reduce_min',
|
|
|
|
|
'sequence_first_step',
|
|
|
|
|
'sequence_last_step',
|
|
|
|
|
'dropout',
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -248,13 +269,13 @@ def gru_unit(input,
|
|
|
|
|
h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
|
|
|
|
|
|
|
|
|
|
The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
|
|
|
|
|
of the equation above, the :math:`z_t` is split into 3 parts -
|
|
|
|
|
:math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
|
|
|
|
|
implement a full GRU unit operator for an input, a fully
|
|
|
|
|
of the equation above, the :math:`z_t` is split into 3 parts -
|
|
|
|
|
:math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
|
|
|
|
|
implement a full GRU unit operator for an input, a fully
|
|
|
|
|
connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`.
|
|
|
|
|
|
|
|
|
|
The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
|
|
|
|
|
of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
|
|
|
|
|
The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
|
|
|
|
|
of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
|
|
|
|
|
an intermediate candidate hidden output, which is denoted by :math:`m_t`.
|
|
|
|
|
This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})`
|
|
|
|
|
and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
|
|
|
|
@ -276,7 +297,7 @@ def gru_unit(input,
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
# assuming we have x_t_data and prev_hidden of size=10
|
|
|
|
|
x_t = fluid.layers.fc(input=x_t_data, size=30)
|
|
|
|
|
x_t = fluid.layers.fc(input=x_t_data, size=30)
|
|
|
|
|
hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t,
|
|
|
|
|
hidden = prev_hidden)
|
|
|
|
|
|
|
|
|
|