|
|
|
@ -37,7 +37,7 @@ depth = 8
|
|
|
|
|
mix_hidden_lr = 1e-3
|
|
|
|
|
|
|
|
|
|
IS_SPARSE = True
|
|
|
|
|
PASS_NUM = 10
|
|
|
|
|
PASS_NUM = 100
|
|
|
|
|
BATCH_SIZE = 10
|
|
|
|
|
|
|
|
|
|
embedding_name = 'emb'
|
|
|
|
@ -77,7 +77,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
|
|
|
|
|
emb_layers.append(mark_embedding)
|
|
|
|
|
|
|
|
|
|
hidden_0_layers = [
|
|
|
|
|
fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
|
|
|
|
|
fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
|
|
|
|
|
for emb in emb_layers
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
|
|
|
|
@ -94,8 +95,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
|
|
|
|
|
|
|
|
|
|
for i in range(1, depth):
|
|
|
|
|
mix_hidden = fluid.layers.sums(input=[
|
|
|
|
|
fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
|
|
|
|
|
fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
|
|
|
|
|
fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
|
|
|
|
|
fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
lstm = fluid.layers.dynamic_lstm(
|
|
|
|
@ -109,8 +110,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
|
|
|
|
|
input_tmp = [mix_hidden, lstm]
|
|
|
|
|
|
|
|
|
|
feature_out = fluid.layers.sums(input=[
|
|
|
|
|
fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
|
|
|
|
|
fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
|
|
|
|
|
fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
|
|
|
|
|
fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
return feature_out
|
|
|
|
@ -171,7 +172,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
|
|
|
|
|
# check other optimizers and check why out will be NAN
|
|
|
|
|
sgd_optimizer = fluid.optimizer.SGD(
|
|
|
|
|
learning_rate=fluid.layers.exponential_decay(
|
|
|
|
|
learning_rate=0.0001,
|
|
|
|
|
learning_rate=0.01,
|
|
|
|
|
decay_steps=100000,
|
|
|
|
|
decay_rate=0.5,
|
|
|
|
|
staircase=True))
|
|
|
|
@ -233,7 +234,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
|
|
|
|
|
print("second per batch: " + str((time.time(
|
|
|
|
|
) - start_time) / batch_id))
|
|
|
|
|
# Set the threshold low to speed up the CI test
|
|
|
|
|
if float(pass_precision) > 0.05:
|
|
|
|
|
if float(pass_precision) > 0.01:
|
|
|
|
|
if save_dirname is not None:
|
|
|
|
|
# TODO(liuyiqun): Change the target to crf_decode
|
|
|
|
|
fluid.io.save_inference_model(save_dirname, [
|
|
|
|
|