@ -280,6 +280,14 @@ class LayerOutput(object):
"""
assert False , " this method should not be invoked "
def set_input ( self , input ) :
"""
Set the input for a memory layer . Can only be used for memory layer
"""
assert isinstance ( input , LayerOutput )
assert self . layer_type == LayerType . MEMORY
SetMemoryInput ( self . name , input . name )
ERROR_CLIPPING = ' error_clipping_threshold '
DROPOUT = ' drop_rate '
@ -2570,8 +2578,10 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
size = sz )
@wrap_name_default ( " memory " , " memory_name " )
def memory ( name ,
size ,
memory_name = None ,
is_seq = False ,
boot_layer = None ,
boot_bias = None ,
@ -2593,14 +2603,32 @@ def memory(name,
If boot_layer is not null , the memory is just the boot_layer ' s output.
Set : code : ` is_seq ` is true boot layer is sequence .
The same name layer in recurrent group will set memory on each time
step .
: param name : memory ' s name.
. . code - block : : python
mem = memory ( size = 256 , name = ' state ' )
state = fc_layer ( input = mem , size = 256 , name = ' state ' )
If you do not want to specify the name , you can equivalently use set_input ( )
to specify the layer needs to be remembered as the following :
. . code - block : : python
mem = memory ( size = 256 )
state = fc_layer ( input = mem , size = 256 )
mem . set_input ( mem )
: param name : the name of the layer which this memory remembers .
If name is None , user should call set_input ( ) to specify the
name of the layer which this memory remembers .
: type name : basestring
: param size : size of memory .
: type size : int
: param memory_name : the name of the memory .
It is ignored when name is provided .
: type memory_name : basestring
: param is_seq : is sequence for boot_layer
: type is_seq : bool
: param boot_layer : boot layer of memory .
@ -2622,13 +2650,21 @@ def memory(name,
boot_bias = ParamAttr . to_bias ( boot_bias )
assert boot_layer is None or isinstance ( boot_layer , LayerOutput )
if name is not None :
memory_name = None
agent_name = Memory ( name , size , is_seq , boot_layer . name
if boot_layer is not None else None , boot_bias ,
boot_bias_active_type . name , boot_with_const_id )
memory_name = Memory (
name ,
size ,
is_sequence = is_seq ,
boot_layer = boot_layer . name if boot_layer is not None else None ,
boot_bias = boot_bias ,
boot_bias_active_type = boot_bias_active_type . name ,
boot_with_const_id = boot_with_const_id ,
memory_name = memory_name )
lout = LayerOutput (
name = agent_name ,
name = memory _name,
size = size ,
layer_type = LayerType . MEMORY ,
parents = [ boot_layer ] if boot_layer is not None else None )
@ -2754,8 +2790,8 @@ def gru_step_layer(input,
: param name :
: param gate_act :
: param bias_attr :
: param param_attr : the parameter_attribute for transforming the output_mem
from previous step .
: param param_attr : the parameter_attribute for transforming the output_mem
from previous step .
: param layer_attr :
: return : LayerOutput object .
: rtype : LayerOutput
@ -2766,10 +2802,10 @@ def gru_step_layer(input,
Layer (
name = name ,
type = LayerType . GRU_STEP_LAYER ,
# The parameter here is for transforming the output_mem. The input has
# already been transformed outside this module so it does not need
# parameter associated with it.
# The parameter here is instead grouped with input is due to
# The parameter here is for transforming the output_mem. The input has
# already been transformed outside this module so it does not need
# parameter associated with it.
# The parameter here is instead grouped with input is due to
# backward model compatibility.
inputs = [ Input ( input . name , * * param_attr . attr ) , output_mem . name ] ,
bias = ParamAttr . to_bias ( bias_attr ) ,
@ -3376,7 +3412,7 @@ def __cost_input__(input, label, weight=None):
ipts = [ Input ( input . name ) , Input ( label . name ) ]
parents = [ input , label ]
if weight is not None :
assert weight . layer_type == LayerType . DATA
assert weight . size == 1
ipts . append ( Input ( weight . name ) )
parents . append ( weight )
return ipts , parents
@ -4740,7 +4776,12 @@ def lambda_cost(input,
@wrap_name_default ( )
@layer_support ( )
def cross_entropy ( input , label , name = None , coeff = 1.0 , layer_attr = None ) :
def cross_entropy ( input ,
label ,
name = None ,
coeff = 1.0 ,
weight = None ,
layer_attr = None ) :
"""
A loss layer for multi class entropy .
@ -4755,22 +4796,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
: type input : LayerOutput .
: param name : The name of this layers . It is not necessary .
: type name : None | basestring .
: param coeff : The coefficient affects the gradient in the backward .
: param coeff : The cost is multiplied with coeff .
The coefficient affects the gradient in the backward .
: type coeff : float .
: param weight : The cost of each sample is multiplied with each weight .
The weight should be a layer with size = 1. Note that gradient
will not be calculated for weight .
: type weight : LayerOutout
: param layer_attr : Extra Layer Attribute .
: type layer_attr : ExtraLayerAttribute
: return : LayerOutput object .
: rtype : LayerOutput .
"""
ipts , parents = __cost_input__ ( input , label , weight )
Layer (
name = name ,
type = LayerType . CROSS_ENTROPY ,
inputs = [ input . name , label . name ] ,
inputs = ipts ,
coeff = coeff ,
* * ExtraLayerAttribute . to_kwargs ( layer_attr ) )
return LayerOutput (
name , LayerType . CROSS_ENTROPY , parents = [ input , label ] , size = 1 )
return LayerOutput ( name , LayerType . CROSS_ENTROPY , parents = parents , size = 1 )
@wrap_name_default ( )