parent
9524c7ab5a
commit
cd6d69a95f
@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from paddle.trainer_config_helpers import *
|
||||||
|
|
||||||
|
######################## data source ################################
|
||||||
|
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
|
||||||
|
dict_file = dict()
|
||||||
|
for line_count, line in enumerate(open(dict_path, "r")):
|
||||||
|
dict_file[line.strip()] = line_count
|
||||||
|
|
||||||
|
define_py_data_sources2(
|
||||||
|
train_list='gserver/tests/Sequence/train.list',
|
||||||
|
test_list=None,
|
||||||
|
module='sequenceGen',
|
||||||
|
obj='process',
|
||||||
|
args={"dict_file": dict_file})
|
||||||
|
|
||||||
|
settings(batch_size=5)
|
||||||
|
######################## network configure ################################
|
||||||
|
dict_dim = len(open(dict_path, 'r').readlines())
|
||||||
|
word_dim = 128
|
||||||
|
hidden_dim = 256
|
||||||
|
label_dim = 3
|
||||||
|
sparse_update = get_config_arg("sparse_update", bool, False)
|
||||||
|
|
||||||
|
data = data_layer(name="word", size=dict_dim)
|
||||||
|
|
||||||
|
emb = embedding_layer(
|
||||||
|
input=data,
|
||||||
|
size=word_dim,
|
||||||
|
param_attr=ParamAttr(sparse_update=sparse_update))
|
||||||
|
|
||||||
|
with mixed_layer(size=hidden_dim * 4) as lstm_input:
|
||||||
|
lstm_input += full_matrix_projection(input=emb)
|
||||||
|
|
||||||
|
lstm = lstmemory(
|
||||||
|
input=lstm_input,
|
||||||
|
act=TanhActivation(),
|
||||||
|
gate_act=SigmoidActivation(),
|
||||||
|
state_act=TanhActivation())
|
||||||
|
|
||||||
|
lstm_last = last_seq(input=lstm)
|
||||||
|
|
||||||
|
with mixed_layer(
|
||||||
|
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
|
||||||
|
output += full_matrix_projection(input=lstm_last)
|
||||||
|
|
||||||
|
outputs(
|
||||||
|
classification_cost(
|
||||||
|
input=output, label=data_layer(
|
||||||
|
name="label", size=1)))
|
@ -1,154 +0,0 @@
|
|||||||
#edit-mode: -*- python -*-
|
|
||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
|
|
||||||
|
|
||||||
# Note: when making change to this file, please make sure
|
|
||||||
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
|
|
||||||
# for comparing these two nets can pass (test_CompareTwoNets)
|
|
||||||
|
|
||||||
default_initial_std(0.1)
|
|
||||||
default_device(0)
|
|
||||||
|
|
||||||
word_dim = 999
|
|
||||||
l1 = 0
|
|
||||||
l2 = 0
|
|
||||||
|
|
||||||
model_type("nn")
|
|
||||||
|
|
||||||
sparse_update = get_config_arg("sparse_update", bool, False)
|
|
||||||
|
|
||||||
TrainData(ProtoData(
|
|
||||||
type = "proto_sequence",
|
|
||||||
files = ('trainer/tests/train_sparse.list'),
|
|
||||||
))
|
|
||||||
|
|
||||||
Settings(
|
|
||||||
algorithm='sgd',
|
|
||||||
batch_size=100,
|
|
||||||
learning_rate=0.0001,
|
|
||||||
learning_rate_decay_a=4e-08,
|
|
||||||
learning_rate_decay_b=0.0,
|
|
||||||
learning_rate_schedule='poly',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
wordvec_dim = 32
|
|
||||||
layer2_dim = 16
|
|
||||||
layer3_dim = 16
|
|
||||||
hidden_dim = 32
|
|
||||||
|
|
||||||
slot_names = ["qb", "qw", "tb", "tw"]
|
|
||||||
|
|
||||||
def ltr_network(network_name,
|
|
||||||
word_dim=word_dim,
|
|
||||||
wordvec_dim=wordvec_dim,
|
|
||||||
layer2_dim=layer2_dim,
|
|
||||||
layer3_dim=layer3_dim,
|
|
||||||
hidden_dim=hidden_dim,
|
|
||||||
slot_names=slot_names,
|
|
||||||
l1=l1,
|
|
||||||
l2=l2):
|
|
||||||
|
|
||||||
slotnum = len(slot_names)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Inputs(slot_names[i] + network_name)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + network_name,
|
|
||||||
type = "data",
|
|
||||||
size = word_dim,
|
|
||||||
device = -1,
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_embedding_" + network_name,
|
|
||||||
type = "mixed",
|
|
||||||
size = wordvec_dim,
|
|
||||||
bias = False,
|
|
||||||
device = -1,
|
|
||||||
inputs = TableProjection(slot_names[i] + network_name,
|
|
||||||
parameter_name = "embedding.w0",
|
|
||||||
decay_rate_l1=l1,
|
|
||||||
sparse_remote_update = True,
|
|
||||||
sparse_update = sparse_update,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
type = "recurrent",
|
|
||||||
active_type = "tanh",
|
|
||||||
bias = Bias(initial_std = 0,
|
|
||||||
parameter_name = "rnn1.bias"),
|
|
||||||
inputs = Input(slot_names[i] + "_embedding_" + network_name,
|
|
||||||
parameter_name = "rnn1.w0")
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnnlast_" + network_name,
|
|
||||||
type = "seqlastins",
|
|
||||||
inputs = [
|
|
||||||
slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
Layer(
|
|
||||||
name = "layer2_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer2_dim,
|
|
||||||
bias = Bias(parameter_name = "layer2.bias"),
|
|
||||||
inputs = [Input(slot_name + "_rnnlast_" + network_name,
|
|
||||||
parameter_name = "_layer2_" + slot_name + ".w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True) for slot_name in slot_names]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "layer3_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer3_dim,
|
|
||||||
bias = Bias(parameter_name = "layer3.bias"),
|
|
||||||
inputs = [
|
|
||||||
Input("layer2_" + network_name,
|
|
||||||
parameter_name = "_layer3.w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "output_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
size = 1,
|
|
||||||
bias = False,
|
|
||||||
inputs = [
|
|
||||||
Input("layer3_" + network_name,
|
|
||||||
parameter_name = "_layerO.w"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
ltr_network("left")
|
|
||||||
ltr_network("right")
|
|
||||||
Inputs("label")
|
|
||||||
Layer(
|
|
||||||
name = "label",
|
|
||||||
type = "data",
|
|
||||||
size = 1,
|
|
||||||
)
|
|
||||||
Outputs("cost", "qb_rnnlast_left")
|
|
||||||
Layer(
|
|
||||||
name = "cost",
|
|
||||||
type = "rank-cost",
|
|
||||||
inputs = ["output_left", "output_right", "label"],
|
|
||||||
)
|
|
Loading…
Reference in new issue