|
|
|
@ -84,6 +84,7 @@ __all__ = [
|
|
|
|
|
'GeneratedInput',
|
|
|
|
|
'SubsequenceInput',
|
|
|
|
|
'gru_step_layer',
|
|
|
|
|
'gru_step_naive_layer',
|
|
|
|
|
'recurrent_layer',
|
|
|
|
|
'BaseGeneratedInput',
|
|
|
|
|
'conv_operator',
|
|
|
|
@ -2284,7 +2285,7 @@ def img_pool_layer(input,
|
|
|
|
|
|
|
|
|
|
type_name = pool_type.name + '-projection' \
|
|
|
|
|
if (
|
|
|
|
|
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
|
|
|
|
|
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
|
|
|
|
|
else pool_type.name
|
|
|
|
|
|
|
|
|
|
pool_size_y = pool_size if pool_size_y is None else pool_size_y
|
|
|
|
@ -3084,6 +3085,78 @@ def gru_step_layer(input,
|
|
|
|
|
activation=act)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@wrap_bias_attr_default()
|
|
|
|
|
@wrap_param_attr_default()
|
|
|
|
|
@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
|
|
|
|
|
@wrap_act_default(act=TanhActivation())
|
|
|
|
|
@wrap_name_default('gru_step')
|
|
|
|
|
@layer_support(ERROR_CLIPPING, DROPOUT)
|
|
|
|
|
def gru_step_naive_layer(input,
|
|
|
|
|
output_mem,
|
|
|
|
|
size=None,
|
|
|
|
|
name=None,
|
|
|
|
|
act=None,
|
|
|
|
|
gate_act=None,
|
|
|
|
|
bias_attr=None,
|
|
|
|
|
param_attr=None,
|
|
|
|
|
layer_attr=None):
|
|
|
|
|
"""
|
|
|
|
|
GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING
|
|
|
|
|
and DROPOUT.
|
|
|
|
|
|
|
|
|
|
:param input:
|
|
|
|
|
:param output_mem:
|
|
|
|
|
:param size:
|
|
|
|
|
:param name:
|
|
|
|
|
:param act:
|
|
|
|
|
:param gate_act:
|
|
|
|
|
:param bias_attr:
|
|
|
|
|
:param param_attr:
|
|
|
|
|
:param layer_attr:
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
if input.size % 3 != 0:
|
|
|
|
|
raise ValueError("GruStep input size must be divided by 3")
|
|
|
|
|
if size is None:
|
|
|
|
|
size = input.size / 3
|
|
|
|
|
|
|
|
|
|
def __gate__(gate_name, offset):
|
|
|
|
|
with mixed_layer(
|
|
|
|
|
name=name + "_" + gate_name,
|
|
|
|
|
size=size,
|
|
|
|
|
layer_attr=layer_attr,
|
|
|
|
|
bias_attr=bias_attr,
|
|
|
|
|
act=gate_act) as gate:
|
|
|
|
|
gate += identity_projection(input=input, offset=offset)
|
|
|
|
|
gate += full_matrix_projection(
|
|
|
|
|
input=output_mem, param_attr=param_attr)
|
|
|
|
|
return gate
|
|
|
|
|
|
|
|
|
|
update_gate = __gate__("update", 0)
|
|
|
|
|
reset_gate = __gate__("reset", size)
|
|
|
|
|
|
|
|
|
|
with mixed_layer(
|
|
|
|
|
name=name + "_reset_output", bias_attr=False) as reset_output:
|
|
|
|
|
reset_output += dotmul_operator(a=output_mem, b=reset_gate)
|
|
|
|
|
|
|
|
|
|
with mixed_layer(
|
|
|
|
|
name=name + "_output_candidate",
|
|
|
|
|
size=size,
|
|
|
|
|
layer_attr=layer_attr,
|
|
|
|
|
bias_attr=bias_attr,
|
|
|
|
|
act=act) as output_candidate:
|
|
|
|
|
output_candidate += identity_projection(input=input, offset=2 * size)
|
|
|
|
|
output_candidate += full_matrix_projection(
|
|
|
|
|
input=reset_output, param_attr=param_attr)
|
|
|
|
|
|
|
|
|
|
with mixed_layer(name=name) as output:
|
|
|
|
|
output += identity_projection(output_mem)
|
|
|
|
|
output += dotmul_operator(a=output_mem, b=update_gate, scale=-1.0)
|
|
|
|
|
output += dotmul_operator(a=output_candidate, b=update_gate)
|
|
|
|
|
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@wrap_name_default()
|
|
|
|
|
@layer_support()
|
|
|
|
|
def get_output_layer(input, arg_name, name=None, layer_attr=None):
|
|
|
|
|