make gru_group parameters sharable

avx_docs
wangyang59 8 years ago
parent ecbff689fb
commit 6da7283475

@ -68,8 +68,8 @@ bool GruStepLayer::init(const LayerMap& layerMap,
if (!Layer::init(layerMap, parameterMap)) return false;
CHECK_EQ(2U, inputLayers_.size());
CHECK_EQ(getSize() * getSize() * 3, parameters_[0]->getSize());
weight_.reset(new Weight(getSize(), getSize() * 3, parameters_[0]));
CHECK_EQ(getSize() * getSize() * 3, parameters_[1]->getSize());
weight_.reset(new Weight(getSize(), getSize() * 3, parameters_[1]));
if (biasParameter_.get() != NULL) {
CHECK_EQ(getSize() * 3, biasParameter_->getSize());

@ -2996,7 +2996,7 @@ class GruStepLayer(LayerBase):
config_assert(input_layer1.size == size,
'input_layer1.size != layer.size')
self.config.active_gate_type = active_gate_type
self.create_input_parameter(0, size * size * 3, [size, size * 3])
self.create_input_parameter(1, size * size * 3, [size, size * 3])
self.create_bias_parameter(bias, size * 3)

@ -19,6 +19,9 @@
# to use these units, import this module in your config_file:
# import trainer.recurrent_units
#
# The modules in this file are DEPRECATED.
# If you would like to use lstm/gru
# please use the functions defined in paddle.trainer_config_helpers.
from paddle.trainer.config_parser import *

@ -2682,6 +2682,7 @@ def lstm_step_layer(input,
@wrap_bias_attr_default()
@wrap_param_attr_default()
@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
@wrap_act_default(act=TanhActivation())
@wrap_name_default('gru_step')
@ -2693,6 +2694,7 @@ def gru_step_layer(input,
name=None,
gate_act=None,
bias_attr=None,
param_attr=None,
layer_attr=None):
"""
@ -2714,7 +2716,7 @@ def gru_step_layer(input,
Layer(
name=name,
type=LayerType.GRU_STEP_LAYER,
inputs=[input.name, output_mem.name],
inputs=[input.name, Input(output_mem.name, **param_attr.attr)],
bias=ParamAttr.to_bias(bias_attr),
size=size,
active_type=act.name,

@ -822,6 +822,7 @@ def gru_unit(input,
size=None,
name=None,
gru_bias_attr=None,
gru_param_attr=None,
act=None,
gate_act=None,
gru_layer_attr=None):
@ -862,6 +863,7 @@ def gru_unit(input,
output_mem=out_mem,
size=size,
bias_attr=gru_bias_attr,
param_attr=gru_param_attr,
act=act,
gate_act=gate_act,
layer_attr=gru_layer_attr)
@ -874,6 +876,7 @@ def gru_group(input,
name=None,
reverse=False,
gru_bias_attr=None,
gru_param_attr=None,
act=None,
gate_act=None,
gru_layer_attr=None):
@ -922,6 +925,7 @@ def gru_group(input,
name=name,
size=size,
gru_bias_attr=gru_bias_attr,
gru_param_attr=gru_param_attr,
act=act,
gate_act=gate_act,
gru_layer_attr=gru_layer_attr)
@ -942,6 +946,7 @@ def simple_gru(input,
mixed_bias_param_attr=None,
mixed_layer_attr=None,
gru_bias_attr=None,
gru_param_attr=None,
act=None,
gate_act=None,
gru_layer_attr=None):
@ -1010,6 +1015,7 @@ def simple_gru(input,
input=m,
reverse=reverse,
gru_bias_attr=gru_bias_attr,
gru_param_attr=gru_param_attr,
act=act,
gate_act=gate_act,
gru_layer_attr=gru_layer_attr)

@ -3,7 +3,7 @@ export configs=(test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops)
export whole_configs=(test_split_datasource)

@ -307,10 +307,10 @@ layers {
active_type: "tanh"
inputs {
input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group"
input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
}
inputs {
input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w1"
}
bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
active_gate_type: "sigmoid"
@ -462,14 +462,14 @@ parameters {
initial_smart: false
}
parameters {
name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
name: "___gru_group_0__@__gru_group_0___recurrent_group.w1"
size: 30000
initial_mean: 0.0
initial_std: 0.01
initial_std: 0.1
dims: 100
dims: 300
initial_strategy: 0
initial_smart: false
initial_smart: true
}
parameters {
name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"

@ -0,0 +1,40 @@
from paddle.trainer_config_helpers import *
settings(learning_rate=1e-4, batch_size=1000)
data_1 = data_layer(name='data_a', size=100)
data_2 = data_layer(name='data_b', size=100)
mixed_param = ParamAttr(name='mixed_param')
gru_param = ParamAttr(name='gru_param')
gru_bias = ParamAttr(name='gru_bias', initial_mean=0., initial_std=0.)
gru1 = simple_gru(
input=data_1,
size=200,
mixed_param_attr=mixed_param,
mixed_bias_param_attr=False,
gru_bias_attr=gru_bias,
gru_param_attr=gru_param)
gru2 = simple_gru(
input=data_2,
size=200,
mixed_param_attr=mixed_param,
mixed_bias_param_attr=False,
gru_bias_attr=gru_bias,
gru_param_attr=gru_param)
softmax_param = ParamAttr(name='softmax_param')
predict = fc_layer(
input=[last_seq(input=gru1), last_seq(input=gru2)],
size=10,
param_attr=[softmax_param, softmax_param],
bias_attr=False,
act=SoftmaxActivation())
outputs(
classification_cost(
input=predict, label=data_layer(
name='label', size=10)))
Loading…
Cancel
Save