parent
d425a5ca10
commit
9ccc94f4a4
@ -0,0 +1,112 @@
|
||||
import numpy
|
||||
import paddle.v2 as paddle
|
||||
from paddle.trainer_config_helpers.atts import ParamAttr
|
||||
|
||||
from mode_v2 import db_lstm
|
||||
|
||||
word_dict_file = './data/wordDict.txt'
|
||||
label_dict_file = './data/targetDict.txt'
|
||||
predicate_file = './data/verbDict.txt'
|
||||
|
||||
word_dict = dict()
|
||||
label_dict = dict()
|
||||
predicate_dict = dict()
|
||||
|
||||
with open(word_dict_file, 'r') as f_word, \
|
||||
open(label_dict_file, 'r') as f_label, \
|
||||
open(predicate_file, 'r') as f_pre:
|
||||
for i, line in enumerate(f_word):
|
||||
w = line.strip()
|
||||
word_dict[w] = i
|
||||
|
||||
for i, line in enumerate(f_label):
|
||||
w = line.strip()
|
||||
label_dict[w] = i
|
||||
|
||||
for i, line in enumerate(f_pre):
|
||||
w = line.strip()
|
||||
predicate_dict[w] = i
|
||||
|
||||
word_dict_len = len(word_dict)
|
||||
label_dict_len = len(label_dict)
|
||||
pred_len = len(predicate_dict)
|
||||
|
||||
|
||||
def train_reader(file_name="data/feature"):
|
||||
def reader():
|
||||
with open(file_name, 'r') as fdata:
|
||||
for line in fdata:
|
||||
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
|
||||
line.strip().split('\t')
|
||||
|
||||
words = sentence.split()
|
||||
sen_len = len(words)
|
||||
word_slot = [word_dict.get(w, UNK_IDX) for w in words]
|
||||
|
||||
predicate_slot = [predicate_dict.get(predicate)] * sen_len
|
||||
ctx_n2_slot = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
|
||||
ctx_n1_slot = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
|
||||
ctx_0_slot = [word_dict.get(ctx_0, UNK_IDX)] * sen_len
|
||||
ctx_p1_slot = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
|
||||
ctx_p2_slot = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
|
||||
|
||||
marks = mark.split()
|
||||
mark_slot = [int(w) for w in marks]
|
||||
|
||||
label_list = label.split()
|
||||
label_slot = [label_dict.get(w) for w in label_list]
|
||||
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
|
||||
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot
|
||||
|
||||
return reader
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
label_dict_len = 500
|
||||
# define network topology
|
||||
output = db_lstm()
|
||||
target = paddle.layer.data(name='target', size=label_dict_len)
|
||||
crf_cost = paddle.layer.crf_layer(
|
||||
size=500,
|
||||
input=output,
|
||||
label=target,
|
||||
param_attr=paddle.attr.Param(
|
||||
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
|
||||
|
||||
crf_dec = paddle.layer.crf_decoding_layer(
|
||||
name='crf_dec_l',
|
||||
size=label_dict_len,
|
||||
input=output,
|
||||
label=target,
|
||||
param_attr=paddle.attr.Param(name='crfw'))
|
||||
|
||||
topo = [crf_cost, crf_dec]
|
||||
parameters = paddle.parameters.create(topo)
|
||||
optimizer = paddle.optimizer.Momentum(momentum=0.01, learning_rate=2e-2)
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
para = parameters.get('___fc_2__.w0')
|
||||
print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
|
||||
event.cost, para.mean())
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
trainer = paddle.trainer.SGD(update_equation=optimizer)
|
||||
|
||||
trainer.train(
|
||||
train_data_reader=train_reader,
|
||||
batch_size=32,
|
||||
topology=topo,
|
||||
parameters=parameters,
|
||||
event_handler=event_handler,
|
||||
num_passes=10000,
|
||||
data_types=[],
|
||||
reader_dict={})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,103 @@
|
||||
import paddle.v2 as paddle
|
||||
|
||||
|
||||
def db_lstm(word_dict_len, label_dict_len, pred_len):
|
||||
mark_dict_len = 2
|
||||
word_dim = 32
|
||||
mark_dim = 5
|
||||
hidden_dim = 512
|
||||
depth = 8
|
||||
|
||||
#8 features
|
||||
word = paddle.layer.data(name='word_data', size=word_dict_len)
|
||||
predicate = paddle.layer.data(name='verb_data', size=pred_len)
|
||||
|
||||
ctx_n2 = paddle.layer.data(name='ctx_n2_data', size=word_dict_len)
|
||||
ctx_n1 = paddle.layer.data(name='ctx_n1_data', size=word_dict_len)
|
||||
ctx_0 = paddle.layer.data(name='ctx_0_data', size=word_dict_len)
|
||||
ctx_p1 = paddle.layer.data(name='ctx_p1_data', size=word_dict_len)
|
||||
ctx_p2 = paddle.layer.data(name='ctx_p2_data', size=word_dict_len)
|
||||
mark = paddle.layer.data(name='mark_data', size=mark_dict_len)
|
||||
|
||||
default_std = 1 / math.sqrt(hidden_dim) / 3.0
|
||||
|
||||
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
|
||||
std_0 = paddle.attr.Param(initial_std=0.)
|
||||
std_default = paddle.attr.Param(initial_std=default_std)
|
||||
|
||||
predicate_embedding = paddle.layer.embeding(
|
||||
size=word_dim,
|
||||
input=predicate,
|
||||
param_attr=paddle.attr.Param(
|
||||
name='vemb', initial_std=default_std))
|
||||
mark_embedding = paddle.layer.embeding(
|
||||
name='word_ctx-in_embedding',
|
||||
size=mark_dim,
|
||||
input=mark,
|
||||
param_attr=std_0)
|
||||
|
||||
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
|
||||
emb_layers = [
|
||||
paddle.layer.embeding(
|
||||
size=word_dim, input=x, param_attr=emb_para) for x in word_input
|
||||
]
|
||||
emb_layers.append(predicate_embedding)
|
||||
emb_layers.append(mark_embedding)
|
||||
|
||||
hidden_0 = paddle.layer.mixed(
|
||||
size=hidden_dim,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=emb, param_attr=std_default) for emb in emb_layers
|
||||
])
|
||||
|
||||
mix_hidden_lr = 1e-3
|
||||
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
|
||||
hidden_para_attr = paddle.attr.Param(
|
||||
initial_std=default_std, learning_rate=mix_hidden_lr)
|
||||
|
||||
lstm_0 = paddle.layer.lstmemory(
|
||||
input=hidden_0,
|
||||
act=paddle.activation.Relu(),
|
||||
gate_act=paddle.activation.Sigmoid(),
|
||||
state_act=paddle.activation.Sigmoid(),
|
||||
bias_attr=std_0,
|
||||
param_attr=lstm_para_attr)
|
||||
|
||||
#stack L-LSTM and R-LSTM with direct edges
|
||||
input_tmp = [hidden_0, lstm_0]
|
||||
|
||||
for i in range(1, depth):
|
||||
mix_hidden = paddle.layer.mixed(
|
||||
size=hidden_dim,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||
])
|
||||
|
||||
lstm = paddle.layer.lstmemory(
|
||||
input=mix_hidden,
|
||||
act=paddle.activation.Relu(),
|
||||
gate_act=paddle.activation.Sigmoid(),
|
||||
state_act=paddle.activation.Sigmoid(),
|
||||
reverse=((i % 2) == 1),
|
||||
bias_attr=std_0,
|
||||
param_attr=lstm_para_attr)
|
||||
|
||||
input_tmp = [mix_hidden, lstm]
|
||||
|
||||
feature_out = paddle.layer.mixed(
|
||||
size=label_dict_len,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||
], )
|
||||
|
||||
return feature_out
|
Loading…
Reference in new issue