|
|
|
@ -614,18 +614,17 @@ def simple_lstm(input,
|
|
|
|
|
|
|
|
|
|
@wrap_name_default('lstm_unit')
|
|
|
|
|
def lstmemory_unit(input,
|
|
|
|
|
memory_boot=None,
|
|
|
|
|
out_memory=None,
|
|
|
|
|
name=None,
|
|
|
|
|
size=None,
|
|
|
|
|
param_attr=None,
|
|
|
|
|
act=None,
|
|
|
|
|
gate_act=None,
|
|
|
|
|
state_act=None,
|
|
|
|
|
mixed_bias_attr=None,
|
|
|
|
|
input_proj_bias_attr=None,
|
|
|
|
|
input_proj_layer_attr=None,
|
|
|
|
|
lstm_bias_attr=None,
|
|
|
|
|
mixed_layer_attr=None,
|
|
|
|
|
lstm_layer_attr=None,
|
|
|
|
|
get_output_layer_attr=None):
|
|
|
|
|
lstm_layer_attr=None):
|
|
|
|
|
"""
|
|
|
|
|
Define calculations that a LSTM unit performs during a single time step.
|
|
|
|
|
This function itself is not a recurrent layer, so it can not be
|
|
|
|
@ -662,8 +661,8 @@ def lstmemory_unit(input,
|
|
|
|
|
|
|
|
|
|
:param input: input layer name.
|
|
|
|
|
:type input: LayerOutput
|
|
|
|
|
:param memory_boot: the initialization state of the LSTM cell.
|
|
|
|
|
:type memory_boot: LayerOutput | None
|
|
|
|
|
:param out_memory: output of previous time step
|
|
|
|
|
:type out_memory: LayerOutput | None
|
|
|
|
|
:param name: lstmemory unit name.
|
|
|
|
|
:type name: basestring
|
|
|
|
|
:param size: lstmemory unit size.
|
|
|
|
@ -676,33 +675,35 @@ def lstmemory_unit(input,
|
|
|
|
|
:type gate_act: BaseActivation
|
|
|
|
|
:param state_act: lstm state activiation type.
|
|
|
|
|
:type state_act: BaseActivation
|
|
|
|
|
:param mixed_bias_attr: bias parameter attribute of mixed layer.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
:type mixed_bias_attr: ParameterAttribute|False
|
|
|
|
|
:param input_proj_bias_attr: bias attribute for input-to-hidden projection.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
:type input_proj_bias_attr: ParameterAttribute|False|None
|
|
|
|
|
:param input_proj_layer_attr: extra layer attribute for input to hidden
|
|
|
|
|
projection of the LSTM unit, such as dropout, error clipping.
|
|
|
|
|
:type input_proj_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:param lstm_bias_attr: bias parameter attribute of lstm layer.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
:type lstm_bias_attr: ParameterAttribute|False
|
|
|
|
|
:param mixed_layer_attr: mixed layer's extra attribute.
|
|
|
|
|
:type mixed_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:param lstm_layer_attr: lstm layer's extra attribute.
|
|
|
|
|
:type lstm_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:param get_output_layer_attr: get output layer's extra attribute.
|
|
|
|
|
:type get_output_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:return: lstmemory unit name.
|
|
|
|
|
:rtype: LayerOutput
|
|
|
|
|
"""
|
|
|
|
|
if size is None:
|
|
|
|
|
assert input.size % 4 == 0
|
|
|
|
|
size = input.size / 4
|
|
|
|
|
out_mem = memory(name=name, size=size)
|
|
|
|
|
state_mem = memory(
|
|
|
|
|
name="%s_state" % name, size=size, boot_layer=memory_boot)
|
|
|
|
|
if out_memory is None:
|
|
|
|
|
out_mem = memory(name=name, size=size)
|
|
|
|
|
else:
|
|
|
|
|
out_mem = out_memory
|
|
|
|
|
|
|
|
|
|
state_mem = memory(name="%s_state" % name, size=size)
|
|
|
|
|
|
|
|
|
|
with mixed_layer(
|
|
|
|
|
name="%s_input_recurrent" % name,
|
|
|
|
|
size=size * 4,
|
|
|
|
|
bias_attr=mixed_bias_attr,
|
|
|
|
|
layer_attr=mixed_layer_attr,
|
|
|
|
|
bias_attr=input_proj_bias_attr,
|
|
|
|
|
layer_attr=input_proj_layer_attr,
|
|
|
|
|
act=IdentityActivation()) as m:
|
|
|
|
|
m += identity_projection(input=input)
|
|
|
|
|
m += full_matrix_projection(input=out_mem, param_attr=param_attr)
|
|
|
|
@ -717,11 +718,7 @@ def lstmemory_unit(input,
|
|
|
|
|
gate_act=gate_act,
|
|
|
|
|
state_act=state_act,
|
|
|
|
|
layer_attr=lstm_layer_attr)
|
|
|
|
|
get_output_layer(
|
|
|
|
|
name='%s_state' % name,
|
|
|
|
|
input=lstm_out,
|
|
|
|
|
arg_name='state',
|
|
|
|
|
layer_attr=get_output_layer_attr)
|
|
|
|
|
get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state')
|
|
|
|
|
|
|
|
|
|
return lstm_out
|
|
|
|
|
|
|
|
|
@ -730,17 +727,16 @@ def lstmemory_unit(input,
|
|
|
|
|
def lstmemory_group(input,
|
|
|
|
|
size=None,
|
|
|
|
|
name=None,
|
|
|
|
|
memory_boot=None,
|
|
|
|
|
out_memory=None,
|
|
|
|
|
reverse=False,
|
|
|
|
|
param_attr=None,
|
|
|
|
|
act=None,
|
|
|
|
|
gate_act=None,
|
|
|
|
|
state_act=None,
|
|
|
|
|
mixed_bias_attr=None,
|
|
|
|
|
input_proj_bias_attr=None,
|
|
|
|
|
input_proj_layer_attr=None,
|
|
|
|
|
lstm_bias_attr=None,
|
|
|
|
|
mixed_layer_attr=None,
|
|
|
|
|
lstm_layer_attr=None,
|
|
|
|
|
get_output_layer_attr=None):
|
|
|
|
|
lstm_layer_attr=None):
|
|
|
|
|
"""
|
|
|
|
|
lstm_group is a recurrent_group version of Long Short Term Memory. It
|
|
|
|
|
does exactly the same calculation as the lstmemory layer (see lstmemory in
|
|
|
|
@ -774,8 +770,8 @@ def lstmemory_group(input,
|
|
|
|
|
:type size: int
|
|
|
|
|
:param name: name of the lstmemory group.
|
|
|
|
|
:type name: basestring
|
|
|
|
|
:param memory_boot: the initialization state of LSTM cell.
|
|
|
|
|
:type memory_boot: LayerOutput | None
|
|
|
|
|
:param out_memory: output of previous time step
|
|
|
|
|
:type out_memory: LayerOutput | None
|
|
|
|
|
:param reverse: is lstm reversed
|
|
|
|
|
:type reverse: bool
|
|
|
|
|
:param param_attr: Parameter config, None if use default.
|
|
|
|
@ -786,18 +782,17 @@ def lstmemory_group(input,
|
|
|
|
|
:type gate_act: BaseActivation
|
|
|
|
|
:param state_act: lstm state activiation type.
|
|
|
|
|
:type state_act: BaseActivation
|
|
|
|
|
:param mixed_bias_attr: bias parameter attribute of mixed layer.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
:type mixed_bias_attr: ParameterAttribute|False
|
|
|
|
|
:param lstm_bias_attr: bias parameter attribute of lstm layer.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
:type lstm_bias_attr: ParameterAttribute|False
|
|
|
|
|
:param mixed_layer_attr: mixed layer's extra attribute.
|
|
|
|
|
:type mixed_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:param input_proj_bias_attr: bias attribute for input-to-hidden projection.
|
|
|
|
|
False means no bias, None means default bias.
|
|
|
|
|
:type input_proj_bias_attr: ParameterAttribute|False|None
|
|
|
|
|
:param input_proj_layer_attr: extra layer attribute for input to hidden
|
|
|
|
|
projection of the LSTM unit, such as dropout, error clipping.
|
|
|
|
|
:type input_proj_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:param lstm_layer_attr: lstm layer's extra attribute.
|
|
|
|
|
:type lstm_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:param get_output_layer_attr: get output layer's extra attribute.
|
|
|
|
|
:type get_output_layer_attr: ExtraLayerAttribute
|
|
|
|
|
:return: the lstmemory group.
|
|
|
|
|
:rtype: LayerOutput
|
|
|
|
|
"""
|
|
|
|
@ -805,18 +800,17 @@ def lstmemory_group(input,
|
|
|
|
|
def __lstm_step__(ipt):
|
|
|
|
|
return lstmemory_unit(
|
|
|
|
|
input=ipt,
|
|
|
|
|
memory_boot=memory_boot,
|
|
|
|
|
name=name,
|
|
|
|
|
size=size,
|
|
|
|
|
mixed_bias_attr=mixed_bias_attr,
|
|
|
|
|
mixed_layer_attr=mixed_layer_attr,
|
|
|
|
|
param_attr=param_attr,
|
|
|
|
|
lstm_bias_attr=lstm_bias_attr,
|
|
|
|
|
act=act,
|
|
|
|
|
gate_act=gate_act,
|
|
|
|
|
state_act=state_act,
|
|
|
|
|
out_memory=out_memory,
|
|
|
|
|
input_proj_bias_attr=input_proj_bias_attr,
|
|
|
|
|
input_proj_layer_attr=input_proj_layer_attr,
|
|
|
|
|
param_attr=param_attr,
|
|
|
|
|
lstm_layer_attr=lstm_layer_attr,
|
|
|
|
|
get_output_layer_attr=get_output_layer_attr)
|
|
|
|
|
lstm_bias_attr=lstm_bias_attr)
|
|
|
|
|
|
|
|
|
|
return recurrent_group(
|
|
|
|
|
name='%s_recurrent_group' % name,
|
|
|
|
|