commit
bd4ec1b493
@ -1,9 +1,9 @@
|
||||
# Use ccache if found ccache program
|
||||
|
||||
find_program(CCACHE_FOUND ccache)
|
||||
find_program(CCACHE_PATH ccache)
|
||||
|
||||
if(CCACHE_FOUND)
|
||||
if(CCACHE_PATH)
|
||||
message(STATUS "Ccache is founded, use ccache to speed up compile.")
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
|
||||
endif(CCACHE_FOUND)
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
|
||||
endif(CCACHE_PATH)
|
||||
|
||||
@ -0,0 +1,74 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
__all__ = ['resnet_cifar10']
|
||||
|
||||
|
||||
def conv_bn_layer(input,
|
||||
ch_out,
|
||||
filter_size,
|
||||
stride,
|
||||
padding,
|
||||
active_type=paddle.activation.Relu(),
|
||||
ch_in=None):
|
||||
tmp = paddle.layer.img_conv(
|
||||
input=input,
|
||||
filter_size=filter_size,
|
||||
num_channels=ch_in,
|
||||
num_filters=ch_out,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
act=paddle.activation.Linear(),
|
||||
bias_attr=False)
|
||||
return paddle.layer.batch_norm(input=tmp, act=active_type)
|
||||
|
||||
|
||||
def shortcut(ipt, n_in, n_out, stride):
|
||||
if n_in != n_out:
|
||||
return conv_bn_layer(ipt, n_out, 1, stride, 0,
|
||||
paddle.activation.Linear())
|
||||
else:
|
||||
return ipt
|
||||
|
||||
|
||||
def basicblock(ipt, ch_out, stride):
|
||||
ch_in = ch_out * 2
|
||||
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
|
||||
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
|
||||
short = shortcut(ipt, ch_in, ch_out, stride)
|
||||
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
|
||||
|
||||
|
||||
def layer_warp(block_func, ipt, features, count, stride):
|
||||
tmp = block_func(ipt, features, stride)
|
||||
for i in range(1, count):
|
||||
tmp = block_func(tmp, features, 1)
|
||||
return tmp
|
||||
|
||||
|
||||
def resnet_cifar10(ipt, depth=32):
|
||||
# depth should be one of 20, 32, 44, 56, 110, 1202
|
||||
assert (depth - 2) % 6 == 0
|
||||
n = (depth - 2) / 6
|
||||
nStages = {16, 64, 128}
|
||||
conv1 = conv_bn_layer(
|
||||
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
|
||||
res1 = layer_warp(basicblock, conv1, 16, n, 1)
|
||||
res2 = layer_warp(basicblock, res1, 32, n, 2)
|
||||
res3 = layer_warp(basicblock, res2, 64, n, 2)
|
||||
pool = paddle.layer.img_pool(
|
||||
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
|
||||
return pool
|
||||
@ -0,0 +1,92 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
|
||||
import sys
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
from api_v2_vgg import vgg_bn_drop
|
||||
|
||||
|
||||
def main():
|
||||
datadim = 3 * 32 * 32
|
||||
classdim = 10
|
||||
|
||||
# PaddlePaddle init
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
image = paddle.layer.data(
|
||||
name="image", type=paddle.data_type.dense_vector(datadim))
|
||||
|
||||
# Add neural network config
|
||||
# option 1. resnet
|
||||
# net = resnet_cifar10(image, depth=32)
|
||||
# option 2. vgg
|
||||
net = vgg_bn_drop(image)
|
||||
|
||||
out = paddle.layer.fc(input=net,
|
||||
size=classdim,
|
||||
act=paddle.activation.Softmax())
|
||||
|
||||
lbl = paddle.layer.data(
|
||||
name="label", type=paddle.data_type.integer_value(classdim))
|
||||
cost = paddle.layer.classification_cost(input=out, label=lbl)
|
||||
|
||||
# Create parameters
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
# Create optimizer
|
||||
momentum_optimizer = paddle.optimizer.Momentum(
|
||||
momentum=0.9,
|
||||
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
|
||||
learning_rate=0.1 / 128.0,
|
||||
learning_rate_decay_a=0.1,
|
||||
learning_rate_decay_b=50000 * 100,
|
||||
learning_rate_schedule='discexp',
|
||||
batch_size=128)
|
||||
|
||||
# End batch and end pass event handler
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "\nPass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
else:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
if isinstance(event, paddle.event.EndPass):
|
||||
result = trainer.test(
|
||||
reader=paddle.batch(
|
||||
paddle.dataset.cifar.test10(), batch_size=128),
|
||||
feeding={'image': 0,
|
||||
'label': 1})
|
||||
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
|
||||
|
||||
# Create trainer
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=momentum_optimizer)
|
||||
trainer.train(
|
||||
reader=paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.cifar.train10(), buf_size=50000),
|
||||
batch_size=128),
|
||||
num_passes=5,
|
||||
event_handler=event_handler,
|
||||
feeding={'image': 0,
|
||||
'label': 1})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,47 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
__all__ = ['vgg_bn_drop']
|
||||
|
||||
|
||||
def vgg_bn_drop(input):
|
||||
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
|
||||
return paddle.networks.img_conv_group(
|
||||
input=ipt,
|
||||
num_channels=num_channels,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
conv_num_filter=[num_filter] * groups,
|
||||
conv_filter_size=3,
|
||||
conv_act=paddle.activation.Relu(),
|
||||
conv_with_batchnorm=True,
|
||||
conv_batchnorm_drop_rate=dropouts,
|
||||
pool_type=paddle.pooling.Max())
|
||||
|
||||
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
|
||||
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
|
||||
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
|
||||
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
|
||||
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
|
||||
|
||||
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
|
||||
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
|
||||
bn = paddle.layer.batch_norm(
|
||||
input=fc1,
|
||||
act=paddle.activation.Relu(),
|
||||
layer_attr=paddle.attr.Extra(drop_rate=0.5))
|
||||
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
|
||||
return fc2
|
||||
@ -0,0 +1,58 @@
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.dataset.uci_housing as uci_housing
|
||||
|
||||
|
||||
def main():
|
||||
# init
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
# network config
|
||||
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
|
||||
y_predict = paddle.layer.fc(input=x,
|
||||
param_attr=paddle.attr.Param(name='w'),
|
||||
size=1,
|
||||
act=paddle.activation.Linear(),
|
||||
bias_attr=paddle.attr.Param(name='b'))
|
||||
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
|
||||
cost = paddle.layer.mse_cost(input=y_predict, label=y)
|
||||
|
||||
# create parameters
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
# create optimizer
|
||||
optimizer = paddle.optimizer.Momentum(momentum=0)
|
||||
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
|
||||
# event_handler to print training and testing info
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f" % (
|
||||
event.pass_id, event.batch_id, event.cost)
|
||||
|
||||
if isinstance(event, paddle.event.EndPass):
|
||||
if (event.pass_id + 1) % 10 == 0:
|
||||
result = trainer.test(
|
||||
reader=paddle.batch(
|
||||
uci_housing.test(), batch_size=2),
|
||||
feeding={'x': 0,
|
||||
'y': 1})
|
||||
print "Test %d, %.2f" % (event.pass_id, result.cost)
|
||||
|
||||
# training
|
||||
trainer.train(
|
||||
reader=paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
uci_housing.train(), buf_size=500),
|
||||
batch_size=2),
|
||||
feeding={'x': 0,
|
||||
'y': 1},
|
||||
event_handler=event_handler,
|
||||
num_passes=30)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,137 @@
|
||||
import paddle.v2 as paddle
|
||||
import gzip
|
||||
|
||||
|
||||
def softmax_regression(img):
|
||||
predict = paddle.layer.fc(input=img,
|
||||
size=10,
|
||||
act=paddle.activation.Softmax())
|
||||
return predict
|
||||
|
||||
|
||||
def multilayer_perceptron(img):
|
||||
# The first fully-connected layer
|
||||
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
|
||||
# The second fully-connected layer and the according activation function
|
||||
hidden2 = paddle.layer.fc(input=hidden1,
|
||||
size=64,
|
||||
act=paddle.activation.Relu())
|
||||
# The thrid fully-connected layer, note that the hidden size should be 10,
|
||||
# which is the number of unique digits
|
||||
predict = paddle.layer.fc(input=hidden2,
|
||||
size=10,
|
||||
act=paddle.activation.Softmax())
|
||||
return predict
|
||||
|
||||
|
||||
def convolutional_neural_network(img):
|
||||
# first conv layer
|
||||
conv_pool_1 = paddle.networks.simple_img_conv_pool(
|
||||
input=img,
|
||||
filter_size=5,
|
||||
num_filters=20,
|
||||
num_channel=1,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act=paddle.activation.Tanh())
|
||||
# second conv layer
|
||||
conv_pool_2 = paddle.networks.simple_img_conv_pool(
|
||||
input=conv_pool_1,
|
||||
filter_size=5,
|
||||
num_filters=50,
|
||||
num_channel=20,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act=paddle.activation.Tanh())
|
||||
# The first fully-connected layer
|
||||
fc1 = paddle.layer.fc(input=conv_pool_2,
|
||||
size=128,
|
||||
act=paddle.activation.Tanh())
|
||||
# The softmax layer, note that the hidden size should be 10,
|
||||
# which is the number of unique digits
|
||||
predict = paddle.layer.fc(input=fc1,
|
||||
size=10,
|
||||
act=paddle.activation.Softmax())
|
||||
return predict
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
# define network topology
|
||||
images = paddle.layer.data(
|
||||
name='pixel', type=paddle.data_type.dense_vector(784))
|
||||
label = paddle.layer.data(
|
||||
name='label', type=paddle.data_type.integer_value(10))
|
||||
|
||||
# Here we can build the prediction network in different ways. Please
|
||||
# choose one by uncomment corresponding line.
|
||||
predict = softmax_regression(images)
|
||||
#predict = multilayer_perceptron(images)
|
||||
#predict = convolutional_neural_network(images)
|
||||
|
||||
cost = paddle.layer.classification_cost(input=predict, label=label)
|
||||
|
||||
try:
|
||||
with gzip.open('params.tar.gz', 'r') as f:
|
||||
parameters = paddle.parameters.Parameters.from_tar(f)
|
||||
except IOError:
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
optimizer = paddle.optimizer.Momentum(
|
||||
learning_rate=0.1 / 128.0,
|
||||
momentum=0.9,
|
||||
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
|
||||
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
|
||||
lists = []
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 1000 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
|
||||
with gzip.open('params.tar.gz', 'w') as f:
|
||||
parameters.to_tar(f)
|
||||
|
||||
elif isinstance(event, paddle.event.EndPass):
|
||||
result = trainer.test(reader=paddle.batch(
|
||||
paddle.dataset.mnist.test(), batch_size=128))
|
||||
print "Test with Pass %d, Cost %f, %s\n" % (
|
||||
event.pass_id, result.cost, result.metrics)
|
||||
lists.append((event.pass_id, result.cost,
|
||||
result.metrics['classification_error_evaluator']))
|
||||
|
||||
trainer.train(
|
||||
reader=paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=8192),
|
||||
batch_size=128),
|
||||
event_handler=event_handler,
|
||||
num_passes=100)
|
||||
|
||||
# find the best pass
|
||||
best = sorted(lists, key=lambda list: float(list[1]))[0]
|
||||
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
|
||||
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
|
||||
|
||||
test_creator = paddle.dataset.mnist.test()
|
||||
test_data = []
|
||||
for item in test_creator():
|
||||
test_data.append((item[0], ))
|
||||
if len(test_data) == 100:
|
||||
break
|
||||
|
||||
# output is a softmax layer. It returns probabilities.
|
||||
# Shape should be (100, 10)
|
||||
probs = paddle.infer(
|
||||
output_layer=predict, parameters=parameters, input=test_data)
|
||||
print probs.shape
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,125 @@
|
||||
import paddle.v2 as paddle
|
||||
import cPickle
|
||||
import copy
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False)
|
||||
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
|
||||
uid = paddle.layer.data(
|
||||
name='user_id',
|
||||
type=paddle.data_type.integer_value(
|
||||
paddle.dataset.movielens.max_user_id() + 1))
|
||||
usr_emb = paddle.layer.embedding(input=uid, size=32)
|
||||
|
||||
usr_gender_id = paddle.layer.data(
|
||||
name='gender_id', type=paddle.data_type.integer_value(2))
|
||||
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
|
||||
|
||||
usr_age_id = paddle.layer.data(
|
||||
name='age_id',
|
||||
type=paddle.data_type.integer_value(
|
||||
len(paddle.dataset.movielens.age_table)))
|
||||
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
|
||||
|
||||
usr_job_id = paddle.layer.data(
|
||||
name='job_id',
|
||||
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
|
||||
) + 1))
|
||||
|
||||
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
|
||||
|
||||
usr_combined_features = paddle.layer.fc(
|
||||
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
|
||||
size=200,
|
||||
act=paddle.activation.Tanh())
|
||||
|
||||
mov_id = paddle.layer.data(
|
||||
name='movie_id',
|
||||
type=paddle.data_type.integer_value(
|
||||
paddle.dataset.movielens.max_movie_id() + 1))
|
||||
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
|
||||
|
||||
mov_categories = paddle.layer.data(
|
||||
name='category_id',
|
||||
type=paddle.data_type.sparse_binary_vector(
|
||||
len(paddle.dataset.movielens.movie_categories())))
|
||||
|
||||
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
|
||||
|
||||
mov_title_id = paddle.layer.data(
|
||||
name='movie_title',
|
||||
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
|
||||
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
|
||||
mov_title_conv = paddle.networks.sequence_conv_pool(
|
||||
input=mov_title_emb, hidden_size=32, context_len=3)
|
||||
|
||||
mov_combined_features = paddle.layer.fc(
|
||||
input=[mov_emb, mov_categories_hidden, mov_title_conv],
|
||||
size=200,
|
||||
act=paddle.activation.Tanh())
|
||||
|
||||
inference = paddle.layer.cos_sim(
|
||||
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
|
||||
cost = paddle.layer.mse_cost(
|
||||
input=inference,
|
||||
label=paddle.layer.data(
|
||||
name='score', type=paddle.data_type.dense_vector(1)))
|
||||
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=paddle.optimizer.Adam(
|
||||
learning_rate=1e-4))
|
||||
feeding = {
|
||||
'user_id': 0,
|
||||
'gender_id': 1,
|
||||
'age_id': 2,
|
||||
'job_id': 3,
|
||||
'movie_id': 4,
|
||||
'category_id': 5,
|
||||
'movie_title': 6,
|
||||
'score': 7
|
||||
}
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "Pass %d Batch %d Cost %.2f" % (
|
||||
event.pass_id, event.batch_id, event.cost)
|
||||
|
||||
trainer.train(
|
||||
reader=paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.movielens.train(), buf_size=8192),
|
||||
batch_size=256),
|
||||
event_handler=event_handler,
|
||||
feeding=feeding,
|
||||
num_passes=1)
|
||||
|
||||
user_id = 234
|
||||
movie_id = 345
|
||||
|
||||
user = paddle.dataset.movielens.user_info()[user_id]
|
||||
movie = paddle.dataset.movielens.movie_info()[movie_id]
|
||||
|
||||
feature = user.value() + movie.value()
|
||||
|
||||
def reader():
|
||||
yield feature
|
||||
|
||||
infer_dict = copy.copy(feeding)
|
||||
del infer_dict['score']
|
||||
|
||||
prediction = paddle.infer(
|
||||
output=inference,
|
||||
parameters=parameters,
|
||||
reader=paddle.batch(
|
||||
reader, batch_size=32),
|
||||
feeding=infer_dict)
|
||||
print(prediction + 5) / 2
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,190 @@
|
||||
import sys
|
||||
import math
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.dataset.conll05 as conll05
|
||||
|
||||
|
||||
def db_lstm():
|
||||
word_dict, verb_dict, label_dict = conll05.get_dict()
|
||||
word_dict_len = len(word_dict)
|
||||
label_dict_len = len(label_dict)
|
||||
pred_len = len(verb_dict)
|
||||
|
||||
mark_dict_len = 2
|
||||
word_dim = 32
|
||||
mark_dim = 5
|
||||
hidden_dim = 512
|
||||
depth = 8
|
||||
|
||||
#8 features
|
||||
def d_type(size):
|
||||
return paddle.data_type.integer_value_sequence(size)
|
||||
|
||||
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
|
||||
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
|
||||
|
||||
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
|
||||
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
|
||||
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
|
||||
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
|
||||
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
|
||||
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
|
||||
|
||||
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
|
||||
|
||||
default_std = 1 / math.sqrt(hidden_dim) / 3.0
|
||||
|
||||
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
|
||||
std_0 = paddle.attr.Param(initial_std=0.)
|
||||
std_default = paddle.attr.Param(initial_std=default_std)
|
||||
|
||||
predicate_embedding = paddle.layer.embedding(
|
||||
size=word_dim,
|
||||
input=predicate,
|
||||
param_attr=paddle.attr.Param(
|
||||
name='vemb', initial_std=default_std))
|
||||
mark_embedding = paddle.layer.embedding(
|
||||
size=mark_dim, input=mark, param_attr=std_0)
|
||||
|
||||
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
|
||||
emb_layers = [
|
||||
paddle.layer.embedding(
|
||||
size=word_dim, input=x, param_attr=emb_para) for x in word_input
|
||||
]
|
||||
emb_layers.append(predicate_embedding)
|
||||
emb_layers.append(mark_embedding)
|
||||
|
||||
hidden_0 = paddle.layer.mixed(
|
||||
size=hidden_dim,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=emb, param_attr=std_default) for emb in emb_layers
|
||||
])
|
||||
|
||||
mix_hidden_lr = 1e-3
|
||||
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
|
||||
hidden_para_attr = paddle.attr.Param(
|
||||
initial_std=default_std, learning_rate=mix_hidden_lr)
|
||||
|
||||
lstm_0 = paddle.layer.lstmemory(
|
||||
input=hidden_0,
|
||||
act=paddle.activation.Relu(),
|
||||
gate_act=paddle.activation.Sigmoid(),
|
||||
state_act=paddle.activation.Sigmoid(),
|
||||
bias_attr=std_0,
|
||||
param_attr=lstm_para_attr)
|
||||
|
||||
#stack L-LSTM and R-LSTM with direct edges
|
||||
input_tmp = [hidden_0, lstm_0]
|
||||
|
||||
for i in range(1, depth):
|
||||
mix_hidden = paddle.layer.mixed(
|
||||
size=hidden_dim,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||
])
|
||||
|
||||
lstm = paddle.layer.lstmemory(
|
||||
input=mix_hidden,
|
||||
act=paddle.activation.Relu(),
|
||||
gate_act=paddle.activation.Sigmoid(),
|
||||
state_act=paddle.activation.Sigmoid(),
|
||||
reverse=((i % 2) == 1),
|
||||
bias_attr=std_0,
|
||||
param_attr=lstm_para_attr)
|
||||
|
||||
input_tmp = [mix_hidden, lstm]
|
||||
|
||||
feature_out = paddle.layer.mixed(
|
||||
size=label_dict_len,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||
], )
|
||||
|
||||
crf_cost = paddle.layer.crf(size=label_dict_len,
|
||||
input=feature_out,
|
||||
label=target,
|
||||
param_attr=paddle.attr.Param(
|
||||
name='crfw',
|
||||
initial_std=default_std,
|
||||
learning_rate=mix_hidden_lr))
|
||||
|
||||
crf_dec = paddle.layer.crf_decoding(
|
||||
name='crf_dec_l',
|
||||
size=label_dict_len,
|
||||
input=feature_out,
|
||||
label=target,
|
||||
param_attr=paddle.attr.Param(name='crfw'))
|
||||
|
||||
return crf_cost, crf_dec
|
||||
|
||||
|
||||
def load_parameter(file_name, h, w):
|
||||
with open(file_name, 'rb') as f:
|
||||
f.read(16) # skip header.
|
||||
return np.fromfile(f, dtype=np.float32).reshape(h, w)
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
# define network topology
|
||||
crf_cost, crf_dec = db_lstm()
|
||||
|
||||
# create parameters
|
||||
parameters = paddle.parameters.create([crf_cost, crf_dec])
|
||||
|
||||
# create optimizer
|
||||
optimizer = paddle.optimizer.Momentum(
|
||||
momentum=0,
|
||||
learning_rate=2e-2,
|
||||
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
|
||||
model_average=paddle.optimizer.ModelAverage(
|
||||
average_window=0.5, max_average_window=10000), )
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
|
||||
trainer = paddle.trainer.SGD(cost=crf_cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
|
||||
|
||||
trn_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
conll05.test(), buf_size=8192), batch_size=10)
|
||||
|
||||
feeding = {
|
||||
'word_data': 0,
|
||||
'ctx_n2_data': 1,
|
||||
'ctx_n1_data': 2,
|
||||
'ctx_0_data': 3,
|
||||
'ctx_p1_data': 4,
|
||||
'ctx_p2_data': 5,
|
||||
'verb_data': 6,
|
||||
'mark_data': 7,
|
||||
'target': 8
|
||||
}
|
||||
|
||||
trainer.train(
|
||||
reader=trn_reader,
|
||||
event_handler=event_handler,
|
||||
num_passes=10000,
|
||||
feeding=feeding)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue