parent
3c5cc6444c
commit
ac69f7730a
@ -0,0 +1,106 @@
|
||||
type: "nn"
|
||||
layers {
|
||||
name: "input"
|
||||
type: "data"
|
||||
size: 256
|
||||
active_type: ""
|
||||
}
|
||||
layers {
|
||||
name: "__gated_unit_layer_0___input_proj"
|
||||
type: "fc"
|
||||
size: 512
|
||||
active_type: "tanh"
|
||||
inputs {
|
||||
input_layer_name: "input"
|
||||
input_parameter_name: "___gated_unit_layer_0___input_proj.w0"
|
||||
}
|
||||
bias_parameter_name: "___gated_unit_layer_0___input_proj.wbias"
|
||||
error_clipping_threshold: 100.0
|
||||
}
|
||||
layers {
|
||||
name: "__gated_unit_layer_0___gate"
|
||||
type: "fc"
|
||||
size: 512
|
||||
active_type: "sigmoid"
|
||||
inputs {
|
||||
input_layer_name: "input"
|
||||
input_parameter_name: "___gated_unit_layer_0___gate.w0"
|
||||
}
|
||||
bias_parameter_name: "___gated_unit_layer_0___gate.wbias"
|
||||
error_clipping_threshold: 100.0
|
||||
}
|
||||
layers {
|
||||
name: "__gated_unit_layer_0___gated_act"
|
||||
type: "mixed"
|
||||
size: 512
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "__gated_unit_layer_0___input_proj"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__gated_unit_layer_0___gate"
|
||||
}
|
||||
error_clipping_threshold: 100.0
|
||||
operator_confs {
|
||||
type: "dot_mul"
|
||||
input_indices: 0
|
||||
input_indices: 1
|
||||
input_sizes: 512
|
||||
input_sizes: 512
|
||||
output_size: 512
|
||||
dotmul_scale: 1
|
||||
}
|
||||
}
|
||||
parameters {
|
||||
name: "___gated_unit_layer_0___input_proj.w0"
|
||||
size: 131072
|
||||
initial_mean: 0.0
|
||||
initial_std: 0.0001
|
||||
dims: 256
|
||||
dims: 512
|
||||
initial_strategy: 0
|
||||
initial_smart: false
|
||||
}
|
||||
parameters {
|
||||
name: "___gated_unit_layer_0___input_proj.wbias"
|
||||
size: 512
|
||||
initial_mean: 0.0
|
||||
initial_std: 1
|
||||
dims: 1
|
||||
dims: 512
|
||||
initial_strategy: 0
|
||||
initial_smart: false
|
||||
}
|
||||
parameters {
|
||||
name: "___gated_unit_layer_0___gate.w0"
|
||||
size: 131072
|
||||
initial_mean: 0.0
|
||||
initial_std: 0.0001
|
||||
dims: 256
|
||||
dims: 512
|
||||
initial_strategy: 0
|
||||
initial_smart: false
|
||||
}
|
||||
parameters {
|
||||
name: "___gated_unit_layer_0___gate.wbias"
|
||||
size: 512
|
||||
initial_mean: 0.0
|
||||
initial_std: 1
|
||||
dims: 1
|
||||
dims: 512
|
||||
initial_strategy: 0
|
||||
initial_smart: false
|
||||
}
|
||||
input_layer_names: "input"
|
||||
output_layer_names: "__gated_unit_layer_0___gated_act"
|
||||
sub_models {
|
||||
name: "root"
|
||||
layer_names: "input"
|
||||
layer_names: "__gated_unit_layer_0___input_proj"
|
||||
layer_names: "__gated_unit_layer_0___gate"
|
||||
layer_names: "__gated_unit_layer_0___gated_act"
|
||||
input_layer_names: "input"
|
||||
output_layer_names: "__gated_unit_layer_0___gated_act"
|
||||
is_recurrent_layer_group: false
|
||||
}
|
||||
|
@ -0,0 +1,16 @@
|
||||
from paddle.trainer_config_helpers import *
|
||||
|
||||
data = data_layer(name='input', size=256)
|
||||
glu = gated_unit_layer(
|
||||
size=512,
|
||||
input=data,
|
||||
act=TanhActivation(),
|
||||
gate_param_attr=ParamAttr(initial_std=1e-4),
|
||||
gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
|
||||
gate_bias_attr=ParamAttr(initial_std=1),
|
||||
inproj_param_attr=ParamAttr(initial_std=1e-4),
|
||||
inproj_layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
|
||||
inproj_bias_attr=ParamAttr(initial_std=1),
|
||||
layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0))
|
||||
|
||||
outputs(glu)
|
Loading…
Reference in new issue