commit
d4327b6895
@ -0,0 +1,74 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
__all__ = ['resnet_cifar10']
|
||||
|
||||
|
||||
def conv_bn_layer(input,
|
||||
ch_out,
|
||||
filter_size,
|
||||
stride,
|
||||
padding,
|
||||
active_type=paddle.activation.Relu(),
|
||||
ch_in=None):
|
||||
tmp = paddle.layer.img_conv(
|
||||
input=input,
|
||||
filter_size=filter_size,
|
||||
num_channels=ch_in,
|
||||
num_filters=ch_out,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
act=paddle.activation.Linear(),
|
||||
bias_attr=False)
|
||||
return paddle.layer.batch_norm(input=tmp, act=active_type)
|
||||
|
||||
|
||||
def shortcut(ipt, n_in, n_out, stride):
|
||||
if n_in != n_out:
|
||||
return conv_bn_layer(ipt, n_out, 1, stride, 0,
|
||||
paddle.activation.Linear())
|
||||
else:
|
||||
return ipt
|
||||
|
||||
|
||||
def basicblock(ipt, ch_out, stride):
|
||||
ch_in = ch_out * 2
|
||||
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
|
||||
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
|
||||
short = shortcut(ipt, ch_in, ch_out, stride)
|
||||
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
|
||||
|
||||
|
||||
def layer_warp(block_func, ipt, features, count, stride):
|
||||
tmp = block_func(ipt, features, stride)
|
||||
for i in range(1, count):
|
||||
tmp = block_func(tmp, features, 1)
|
||||
return tmp
|
||||
|
||||
|
||||
def resnet_cifar10(ipt, depth=32):
|
||||
# depth should be one of 20, 32, 44, 56, 110, 1202
|
||||
assert (depth - 2) % 6 == 0
|
||||
n = (depth - 2) / 6
|
||||
nStages = {16, 64, 128}
|
||||
conv1 = conv_bn_layer(
|
||||
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
|
||||
res1 = layer_warp(basicblock, conv1, 16, n, 1)
|
||||
res2 = layer_warp(basicblock, res1, 32, n, 2)
|
||||
res3 = layer_warp(basicblock, res2, 64, n, 2)
|
||||
pool = paddle.layer.img_pool(
|
||||
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
|
||||
return pool
|
@ -0,0 +1,91 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
|
||||
import sys
|
||||
import paddle.v2 as paddle
|
||||
from api_v2_vgg import vgg_bn_drop
|
||||
from api_v2_resnet import resnet_cifar10
|
||||
|
||||
|
||||
def main():
|
||||
datadim = 3 * 32 * 32
|
||||
classdim = 10
|
||||
|
||||
# PaddlePaddle init
|
||||
paddle.init(use_gpu=True, trainer_count=1)
|
||||
|
||||
image = paddle.layer.data(
|
||||
name="image", type=paddle.data_type.dense_vector(datadim))
|
||||
|
||||
# Add neural network config
|
||||
# option 1. resnet
|
||||
net = resnet_cifar10(image, depth=32)
|
||||
# option 2. vgg
|
||||
# net = vgg_bn_drop(image)
|
||||
|
||||
out = paddle.layer.fc(input=net,
|
||||
size=classdim,
|
||||
act=paddle.activation.Softmax())
|
||||
|
||||
lbl = paddle.layer.data(
|
||||
name="label", type=paddle.data_type.integer_value(classdim))
|
||||
cost = paddle.layer.classification_cost(input=out, label=lbl)
|
||||
|
||||
# Create parameters
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
# Create optimizer
|
||||
momentum_optimizer = paddle.optimizer.Momentum(
|
||||
momentum=0.9,
|
||||
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
|
||||
learning_rate=0.1 / 128.0,
|
||||
learning_rate_decay_a=0.1,
|
||||
learning_rate_decay_b=50000 * 100,
|
||||
learning_rate_schedule='discexp',
|
||||
batch_size=128)
|
||||
|
||||
# End batch and end pass event handler
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "\nPass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
else:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
if isinstance(event, paddle.event.EndPass):
|
||||
result = trainer.test(
|
||||
reader=paddle.batch(
|
||||
paddle.dataset.cifar.test10(), batch_size=128),
|
||||
reader_dict={'image': 0,
|
||||
'label': 1})
|
||||
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
|
||||
|
||||
# Create trainer
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=momentum_optimizer)
|
||||
trainer.train(
|
||||
reader=paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.cifar.train10(), buf_size=50000),
|
||||
batch_size=128),
|
||||
num_passes=5,
|
||||
event_handler=event_handler,
|
||||
reader_dict={'image': 0,
|
||||
'label': 1})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,47 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
__all__ = ['vgg_bn_drop']
|
||||
|
||||
|
||||
def vgg_bn_drop(input):
|
||||
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
|
||||
return paddle.networks.img_conv_group(
|
||||
input=ipt,
|
||||
num_channels=num_channels,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
conv_num_filter=[num_filter] * groups,
|
||||
conv_filter_size=3,
|
||||
conv_act=paddle.activation.Relu(),
|
||||
conv_with_batchnorm=True,
|
||||
conv_batchnorm_drop_rate=dropouts,
|
||||
pool_type=paddle.pooling.Max())
|
||||
|
||||
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
|
||||
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
|
||||
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
|
||||
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
|
||||
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
|
||||
|
||||
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
|
||||
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
|
||||
bn = paddle.layer.batch_norm(
|
||||
input=fc1,
|
||||
act=paddle.activation.Relu(),
|
||||
layer_attr=paddle.attr.Extra(drop_rate=0.5))
|
||||
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
|
||||
return fc2
|
@ -0,0 +1,58 @@
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.dataset.uci_housing as uci_housing
|
||||
|
||||
|
||||
def main():
|
||||
# init
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
# network config
|
||||
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
|
||||
y_predict = paddle.layer.fc(input=x,
|
||||
param_attr=paddle.attr.Param(name='w'),
|
||||
size=1,
|
||||
act=paddle.activation.Linear(),
|
||||
bias_attr=paddle.attr.Param(name='b'))
|
||||
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
|
||||
cost = paddle.layer.regression_cost(input=y_predict, label=y)
|
||||
|
||||
# create parameters
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
# create optimizer
|
||||
optimizer = paddle.optimizer.Momentum(momentum=0)
|
||||
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
|
||||
# event_handler to print training and testing info
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
|
||||
if isinstance(event, paddle.event.EndPass):
|
||||
result = trainer.test(
|
||||
reader=paddle.reader.batched(
|
||||
uci_housing.test(), batch_size=2),
|
||||
reader_dict={'x': 0,
|
||||
'y': 1})
|
||||
if event.pass_id % 10 == 0:
|
||||
print "Test %d, %s" % (event.pass_id, result.metrics)
|
||||
|
||||
# training
|
||||
trainer.train(
|
||||
reader=paddle.reader.batched(
|
||||
paddle.reader.shuffle(
|
||||
uci_housing.train(), buf_size=500),
|
||||
batch_size=2),
|
||||
reader_dict={'x': 0,
|
||||
'y': 1},
|
||||
event_handler=event_handler,
|
||||
num_passes=30)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,190 @@
|
||||
import sys
|
||||
import math
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.dataset.conll05 as conll05
|
||||
|
||||
|
||||
def db_lstm():
|
||||
word_dict, verb_dict, label_dict = conll05.get_dict()
|
||||
word_dict_len = len(word_dict)
|
||||
label_dict_len = len(label_dict)
|
||||
pred_len = len(verb_dict)
|
||||
|
||||
mark_dict_len = 2
|
||||
word_dim = 32
|
||||
mark_dim = 5
|
||||
hidden_dim = 512
|
||||
depth = 8
|
||||
|
||||
#8 features
|
||||
def d_type(size):
|
||||
return paddle.data_type.integer_value_sequence(size)
|
||||
|
||||
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
|
||||
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
|
||||
|
||||
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
|
||||
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
|
||||
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
|
||||
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
|
||||
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
|
||||
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
|
||||
|
||||
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
|
||||
|
||||
default_std = 1 / math.sqrt(hidden_dim) / 3.0
|
||||
|
||||
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
|
||||
std_0 = paddle.attr.Param(initial_std=0.)
|
||||
std_default = paddle.attr.Param(initial_std=default_std)
|
||||
|
||||
predicate_embedding = paddle.layer.embedding(
|
||||
size=word_dim,
|
||||
input=predicate,
|
||||
param_attr=paddle.attr.Param(
|
||||
name='vemb', initial_std=default_std))
|
||||
mark_embedding = paddle.layer.embedding(
|
||||
size=mark_dim, input=mark, param_attr=std_0)
|
||||
|
||||
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
|
||||
emb_layers = [
|
||||
paddle.layer.embedding(
|
||||
size=word_dim, input=x, param_attr=emb_para) for x in word_input
|
||||
]
|
||||
emb_layers.append(predicate_embedding)
|
||||
emb_layers.append(mark_embedding)
|
||||
|
||||
hidden_0 = paddle.layer.mixed(
|
||||
size=hidden_dim,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=emb, param_attr=std_default) for emb in emb_layers
|
||||
])
|
||||
|
||||
mix_hidden_lr = 1e-3
|
||||
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
|
||||
hidden_para_attr = paddle.attr.Param(
|
||||
initial_std=default_std, learning_rate=mix_hidden_lr)
|
||||
|
||||
lstm_0 = paddle.layer.lstmemory(
|
||||
input=hidden_0,
|
||||
act=paddle.activation.Relu(),
|
||||
gate_act=paddle.activation.Sigmoid(),
|
||||
state_act=paddle.activation.Sigmoid(),
|
||||
bias_attr=std_0,
|
||||
param_attr=lstm_para_attr)
|
||||
|
||||
#stack L-LSTM and R-LSTM with direct edges
|
||||
input_tmp = [hidden_0, lstm_0]
|
||||
|
||||
for i in range(1, depth):
|
||||
mix_hidden = paddle.layer.mixed(
|
||||
size=hidden_dim,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||
])
|
||||
|
||||
lstm = paddle.layer.lstmemory(
|
||||
input=mix_hidden,
|
||||
act=paddle.activation.Relu(),
|
||||
gate_act=paddle.activation.Sigmoid(),
|
||||
state_act=paddle.activation.Sigmoid(),
|
||||
reverse=((i % 2) == 1),
|
||||
bias_attr=std_0,
|
||||
param_attr=lstm_para_attr)
|
||||
|
||||
input_tmp = [mix_hidden, lstm]
|
||||
|
||||
feature_out = paddle.layer.mixed(
|
||||
size=label_dict_len,
|
||||
bias_attr=std_default,
|
||||
input=[
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||
paddle.layer.full_matrix_projection(
|
||||
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||
], )
|
||||
|
||||
crf_cost = paddle.layer.crf(size=label_dict_len,
|
||||
input=feature_out,
|
||||
label=target,
|
||||
param_attr=paddle.attr.Param(
|
||||
name='crfw',
|
||||
initial_std=default_std,
|
||||
learning_rate=mix_hidden_lr))
|
||||
|
||||
crf_dec = paddle.layer.crf_decoding(
|
||||
name='crf_dec_l',
|
||||
size=label_dict_len,
|
||||
input=feature_out,
|
||||
label=target,
|
||||
param_attr=paddle.attr.Param(name='crfw'))
|
||||
|
||||
return crf_cost, crf_dec
|
||||
|
||||
|
||||
def load_parameter(file_name, h, w):
|
||||
with open(file_name, 'rb') as f:
|
||||
f.read(16) # skip header.
|
||||
return np.fromfile(f, dtype=np.float32).reshape(h, w)
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
# define network topology
|
||||
crf_cost, crf_dec = db_lstm()
|
||||
|
||||
# create parameters
|
||||
parameters = paddle.parameters.create([crf_cost, crf_dec])
|
||||
|
||||
# create optimizer
|
||||
optimizer = paddle.optimizer.Momentum(
|
||||
momentum=0,
|
||||
learning_rate=2e-2,
|
||||
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
|
||||
model_average=paddle.optimizer.ModelAverage(
|
||||
average_window=0.5, max_average_window=10000), )
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
|
||||
trainer = paddle.trainer.SGD(cost=crf_cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
|
||||
|
||||
trn_reader = paddle.reader.batched(
|
||||
paddle.reader.shuffle(
|
||||
conll05.test(), buf_size=8192), batch_size=10)
|
||||
|
||||
reader_dict = {
|
||||
'word_data': 0,
|
||||
'ctx_n2_data': 1,
|
||||
'ctx_n1_data': 2,
|
||||
'ctx_0_data': 3,
|
||||
'ctx_p1_data': 4,
|
||||
'ctx_p2_data': 5,
|
||||
'verb_data': 6,
|
||||
'mark_data': 7,
|
||||
'target': 8
|
||||
}
|
||||
|
||||
trainer.train(
|
||||
reader=trn_reader,
|
||||
event_handler=event_handler,
|
||||
num_passes=10000,
|
||||
reader_dict=reader_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,166 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import paddle.trainer_config_helpers.attrs as attrs
|
||||
from paddle.trainer_config_helpers.poolings import MaxPooling
|
||||
import paddle.v2 as paddle
|
||||
|
||||
|
||||
def convolution_net(input_dim,
|
||||
class_dim=2,
|
||||
emb_dim=128,
|
||||
hid_dim=128,
|
||||
is_predict=False):
|
||||
data = paddle.layer.data("word",
|
||||
paddle.data_type.integer_value_sequence(input_dim))
|
||||
emb = paddle.layer.embedding(input=data, size=emb_dim)
|
||||
conv_3 = paddle.networks.sequence_conv_pool(
|
||||
input=emb, context_len=3, hidden_size=hid_dim)
|
||||
conv_4 = paddle.networks.sequence_conv_pool(
|
||||
input=emb, context_len=4, hidden_size=hid_dim)
|
||||
output = paddle.layer.fc(input=[conv_3, conv_4],
|
||||
size=class_dim,
|
||||
act=paddle.activation.Softmax())
|
||||
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
|
||||
cost = paddle.layer.classification_cost(input=output, label=lbl)
|
||||
return cost
|
||||
|
||||
|
||||
def stacked_lstm_net(input_dim,
|
||||
class_dim=2,
|
||||
emb_dim=128,
|
||||
hid_dim=512,
|
||||
stacked_num=3,
|
||||
is_predict=False):
|
||||
"""
|
||||
A Wrapper for sentiment classification task.
|
||||
This network uses bi-directional recurrent network,
|
||||
consisting three LSTM layers. This configure is referred to
|
||||
the paper as following url, but use fewer layrs.
|
||||
http://www.aclweb.org/anthology/P15-1109
|
||||
|
||||
input_dim: here is word dictionary dimension.
|
||||
class_dim: number of categories.
|
||||
emb_dim: dimension of word embedding.
|
||||
hid_dim: dimension of hidden layer.
|
||||
stacked_num: number of stacked lstm-hidden layer.
|
||||
is_predict: is predicting or not.
|
||||
Some layers is not needed in network when predicting.
|
||||
"""
|
||||
assert stacked_num % 2 == 1
|
||||
|
||||
layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5)
|
||||
fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3)
|
||||
lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.)
|
||||
para_attr = [fc_para_attr, lstm_para_attr]
|
||||
bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.)
|
||||
relu = paddle.activation.Relu()
|
||||
linear = paddle.activation.Linear()
|
||||
|
||||
data = paddle.layer.data("word",
|
||||
paddle.data_type.integer_value_sequence(input_dim))
|
||||
emb = paddle.layer.embedding(input=data, size=emb_dim)
|
||||
|
||||
fc1 = paddle.layer.fc(input=emb,
|
||||
size=hid_dim,
|
||||
act=linear,
|
||||
bias_attr=bias_attr)
|
||||
lstm1 = paddle.layer.lstmemory(
|
||||
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
|
||||
|
||||
inputs = [fc1, lstm1]
|
||||
for i in range(2, stacked_num + 1):
|
||||
fc = paddle.layer.fc(input=inputs,
|
||||
size=hid_dim,
|
||||
act=linear,
|
||||
param_attr=para_attr,
|
||||
bias_attr=bias_attr)
|
||||
lstm = paddle.layer.lstmemory(
|
||||
input=fc,
|
||||
reverse=(i % 2) == 0,
|
||||
act=relu,
|
||||
bias_attr=bias_attr,
|
||||
layer_attr=layer_attr)
|
||||
inputs = [fc, lstm]
|
||||
|
||||
fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling())
|
||||
lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling())
|
||||
output = paddle.layer.fc(input=[fc_last, lstm_last],
|
||||
size=class_dim,
|
||||
act=paddle.activation.Softmax(),
|
||||
bias_attr=bias_attr,
|
||||
param_attr=para_attr)
|
||||
|
||||
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
|
||||
cost = paddle.layer.classification_cost(input=output, label=lbl)
|
||||
return cost
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# init
|
||||
paddle.init(use_gpu=True, trainer_count=4)
|
||||
|
||||
# network config
|
||||
print 'load dictionary...'
|
||||
word_dict = paddle.dataset.imdb.word_dict()
|
||||
dict_dim = len(word_dict)
|
||||
class_dim = 2
|
||||
|
||||
# Please choose the way to build the network
|
||||
# by uncommenting the corresponding line.
|
||||
cost = convolution_net(dict_dim, class_dim=class_dim)
|
||||
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
|
||||
|
||||
# create parameters
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
# create optimizer
|
||||
adam_optimizer = paddle.optimizer.Adam(
|
||||
learning_rate=2e-3,
|
||||
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
|
||||
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
|
||||
|
||||
# End batch and end pass event handler
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
print "\nPass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
else:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
if isinstance(event, paddle.event.EndPass):
|
||||
result = trainer.test(
|
||||
reader=paddle.reader.batched(
|
||||
lambda: paddle.dataset.imdb.test(word_dict),
|
||||
batch_size=128),
|
||||
reader_dict={'word': 0,
|
||||
'label': 1})
|
||||
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
|
||||
|
||||
# create trainer
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=adam_optimizer)
|
||||
|
||||
trainer.train(
|
||||
reader=paddle.reader.batched(
|
||||
paddle.reader.shuffle(
|
||||
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
|
||||
batch_size=100),
|
||||
event_handler=event_handler,
|
||||
reader_dict={'word': 0,
|
||||
'label': 1},
|
||||
num_passes=10)
|
@ -0,0 +1,110 @@
|
||||
import os
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
from seqToseq_net_v2 import seqToseq_net_v2
|
||||
|
||||
# Data Definiation.
|
||||
# TODO:This code should be merged to dataset package.
|
||||
data_dir = "./data/pre-wmt14"
|
||||
src_lang_dict = os.path.join(data_dir, 'src.dict')
|
||||
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
|
||||
|
||||
source_dict_dim = len(open(src_lang_dict, "r").readlines())
|
||||
target_dict_dim = len(open(trg_lang_dict, "r").readlines())
|
||||
|
||||
|
||||
def read_to_dict(dict_path):
|
||||
with open(dict_path, "r") as fin:
|
||||
out_dict = {
|
||||
line.strip(): line_count
|
||||
for line_count, line in enumerate(fin)
|
||||
}
|
||||
return out_dict
|
||||
|
||||
|
||||
src_dict = read_to_dict(src_lang_dict)
|
||||
trg_dict = read_to_dict(trg_lang_dict)
|
||||
|
||||
train_list = os.path.join(data_dir, 'train.list')
|
||||
test_list = os.path.join(data_dir, 'test.list')
|
||||
|
||||
UNK_IDX = 2
|
||||
START = "<s>"
|
||||
END = "<e>"
|
||||
|
||||
|
||||
def _get_ids(s, dictionary):
|
||||
words = s.strip().split()
|
||||
return [dictionary[START]] + \
|
||||
[dictionary.get(w, UNK_IDX) for w in words] + \
|
||||
[dictionary[END]]
|
||||
|
||||
|
||||
def train_reader(file_name):
|
||||
def reader():
|
||||
with open(file_name, 'r') as f:
|
||||
for line_count, line in enumerate(f):
|
||||
line_split = line.strip().split('\t')
|
||||
if len(line_split) != 2:
|
||||
continue
|
||||
src_seq = line_split[0] # one source sequence
|
||||
src_ids = _get_ids(src_seq, src_dict)
|
||||
|
||||
trg_seq = line_split[1] # one target sequence
|
||||
trg_words = trg_seq.split()
|
||||
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
|
||||
|
||||
# remove sequence whose length > 80 in training mode
|
||||
if len(src_ids) > 80 or len(trg_ids) > 80:
|
||||
continue
|
||||
trg_ids_next = trg_ids + [trg_dict[END]]
|
||||
trg_ids = [trg_dict[START]] + trg_ids
|
||||
|
||||
yield src_ids, trg_ids, trg_ids_next
|
||||
|
||||
return reader
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
|
||||
# define network topology
|
||||
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
|
||||
parameters = paddle.parameters.create(cost)
|
||||
|
||||
# define optimize method and trainer
|
||||
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
|
||||
# define data reader
|
||||
reader_dict = {
|
||||
'source_language_word': 0,
|
||||
'target_language_word': 1,
|
||||
'target_language_next_word': 2
|
||||
}
|
||||
|
||||
wmt14_reader = paddle.reader.batched(
|
||||
paddle.reader.shuffle(
|
||||
train_reader("data/pre-wmt14/train/train"), buf_size=8192),
|
||||
batch_size=5)
|
||||
|
||||
# define event_handler callback
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 10 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f, %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||
|
||||
# start to train
|
||||
trainer.train(
|
||||
reader=wmt14_reader,
|
||||
event_handler=event_handler,
|
||||
num_passes=10000,
|
||||
reader_dict=reader_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,92 @@
|
||||
import paddle.v2 as paddle
|
||||
|
||||
|
||||
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
|
||||
### Network Architecture
|
||||
word_vector_dim = 512 # dimension of word vector
|
||||
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
|
||||
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
|
||||
|
||||
#### Encoder
|
||||
src_word_id = paddle.layer.data(
|
||||
name='source_language_word',
|
||||
type=paddle.data_type.integer_value_sequence(source_dict_dim))
|
||||
src_embedding = paddle.layer.embedding(
|
||||
input=src_word_id,
|
||||
size=word_vector_dim,
|
||||
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
|
||||
src_forward = paddle.networks.simple_gru(
|
||||
input=src_embedding, size=encoder_size)
|
||||
src_backward = paddle.networks.simple_gru(
|
||||
input=src_embedding, size=encoder_size, reverse=True)
|
||||
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
|
||||
|
||||
#### Decoder
|
||||
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
|
||||
encoded_proj += paddle.layer.full_matrix_projection(
|
||||
input=encoded_vector)
|
||||
|
||||
backward_first = paddle.layer.first_seq(input=src_backward)
|
||||
|
||||
with paddle.layer.mixed(
|
||||
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
|
||||
decoder_boot += paddle.layer.full_matrix_projection(
|
||||
input=backward_first)
|
||||
|
||||
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
|
||||
|
||||
decoder_mem = paddle.layer.memory(
|
||||
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
|
||||
|
||||
context = paddle.networks.simple_attention(
|
||||
encoded_sequence=enc_vec,
|
||||
encoded_proj=enc_proj,
|
||||
decoder_state=decoder_mem)
|
||||
|
||||
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
|
||||
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
|
||||
decoder_inputs += paddle.layer.full_matrix_projection(
|
||||
input=current_word)
|
||||
|
||||
gru_step = paddle.layer.gru_step(
|
||||
name='gru_decoder',
|
||||
input=decoder_inputs,
|
||||
output_mem=decoder_mem,
|
||||
size=decoder_size)
|
||||
|
||||
with paddle.layer.mixed(
|
||||
size=target_dict_dim,
|
||||
bias_attr=True,
|
||||
act=paddle.activation.Softmax()) as out:
|
||||
out += paddle.layer.full_matrix_projection(input=gru_step)
|
||||
return out
|
||||
|
||||
decoder_group_name = "decoder_group"
|
||||
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
|
||||
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
|
||||
group_inputs = [group_input1, group_input2]
|
||||
|
||||
trg_embedding = paddle.layer.embedding(
|
||||
input=paddle.layer.data(
|
||||
name='target_language_word',
|
||||
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
|
||||
size=word_vector_dim,
|
||||
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
|
||||
group_inputs.append(trg_embedding)
|
||||
|
||||
# For decoder equipped with attention mechanism, in training,
|
||||
# target embeding (the groudtruth) is the data input,
|
||||
# while encoded source sequence is accessed to as an unbounded memory.
|
||||
# Here, the StaticInput defines a read-only memory
|
||||
# for the recurrent_group.
|
||||
decoder = paddle.layer.recurrent_group(
|
||||
name=decoder_group_name,
|
||||
step=gru_decoder_with_attention,
|
||||
input=group_inputs)
|
||||
|
||||
lbl = paddle.layer.data(
|
||||
name='target_language_next_word',
|
||||
type=paddle.data_type.integer_value_sequence(target_dict_dim))
|
||||
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
|
||||
|
||||
return cost
|
@ -0,0 +1,80 @@
|
||||
import math
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
dictsize = 1953
|
||||
embsize = 32
|
||||
hiddensize = 256
|
||||
N = 5
|
||||
|
||||
|
||||
def wordemb(inlayer):
|
||||
wordemb = paddle.layer.table_projection(
|
||||
input=inlayer,
|
||||
size=embsize,
|
||||
param_attr=paddle.attr.Param(
|
||||
name="_proj",
|
||||
initial_std=0.001,
|
||||
learning_rate=1,
|
||||
l2_rate=0, ))
|
||||
return wordemb
|
||||
|
||||
|
||||
def main():
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
word_dict = paddle.dataset.imikolov.build_dict()
|
||||
dict_size = len(word_dict)
|
||||
firstword = paddle.layer.data(
|
||||
name="firstw", type=paddle.data_type.integer_value(dict_size))
|
||||
secondword = paddle.layer.data(
|
||||
name="secondw", type=paddle.data_type.integer_value(dict_size))
|
||||
thirdword = paddle.layer.data(
|
||||
name="thirdw", type=paddle.data_type.integer_value(dict_size))
|
||||
fourthword = paddle.layer.data(
|
||||
name="fourthw", type=paddle.data_type.integer_value(dict_size))
|
||||
nextword = paddle.layer.data(
|
||||
name="fifthw", type=paddle.data_type.integer_value(dict_size))
|
||||
|
||||
Efirst = wordemb(firstword)
|
||||
Esecond = wordemb(secondword)
|
||||
Ethird = wordemb(thirdword)
|
||||
Efourth = wordemb(fourthword)
|
||||
|
||||
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
|
||||
hidden1 = paddle.layer.fc(input=contextemb,
|
||||
size=hiddensize,
|
||||
act=paddle.activation.Sigmoid(),
|
||||
layer_attr=paddle.attr.Extra(drop_rate=0.5),
|
||||
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||
param_attr=paddle.attr.Param(
|
||||
initial_std=1. / math.sqrt(embsize * 8),
|
||||
learning_rate=1))
|
||||
predictword = paddle.layer.fc(input=hidden1,
|
||||
size=dict_size,
|
||||
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||
act=paddle.activation.Softmax())
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
result = trainer.test(
|
||||
paddle.batch(
|
||||
paddle.dataset.imikolov.test(word_dict, N), 32))
|
||||
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics,
|
||||
result.metrics)
|
||||
|
||||
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
|
||||
parameters = paddle.parameters.create(cost)
|
||||
adam_optimizer = paddle.optimizer.Adam(
|
||||
learning_rate=3e-3,
|
||||
regularization=paddle.optimizer.L2Regularization(8e-4))
|
||||
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
|
||||
trainer.train(
|
||||
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
|
||||
num_passes=30,
|
||||
event_handler=event_handler)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,37 +1,26 @@
|
||||
API中文手册
|
||||
============
|
||||
API
|
||||
===
|
||||
|
||||
DataProvider API
|
||||
----------------
|
||||
模型配置 API
|
||||
------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
data_provider/dataprovider_cn.rst
|
||||
data_provider/pydataprovider2_cn.rst
|
||||
v2/model_configs.rst
|
||||
|
||||
.. _api_trainer_config:
|
||||
|
||||
Model Config API
|
||||
----------------
|
||||
数据 API
|
||||
--------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
trainer_config_helpers/optimizers.rst
|
||||
trainer_config_helpers/data_sources.rst
|
||||
trainer_config_helpers/layers.rst
|
||||
trainer_config_helpers/activations.rst
|
||||
trainer_config_helpers/poolings.rst
|
||||
trainer_config_helpers/networks.rst
|
||||
trainer_config_helpers/evaluators.rst
|
||||
trainer_config_helpers/attrs.rst
|
||||
|
||||
v2/data.rst
|
||||
|
||||
Applications API
|
||||
----------------
|
||||
训练 API
|
||||
--------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
predict/swig_py_paddle_cn.rst
|
||||
v2/run_logic.rst
|
@ -1,37 +1,26 @@
|
||||
API
|
||||
===
|
||||
|
||||
DataProvider API
|
||||
Model Config API
|
||||
----------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
data_provider/dataprovider_en.rst
|
||||
data_provider/pydataprovider2_en.rst
|
||||
|
||||
.. _api_trainer_config:
|
||||
v2/model_configs.rst
|
||||
|
||||
Model Config API
|
||||
----------------
|
||||
Data API
|
||||
--------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
trainer_config_helpers/optimizers.rst
|
||||
trainer_config_helpers/data_sources.rst
|
||||
trainer_config_helpers/layers.rst
|
||||
trainer_config_helpers/activations.rst
|
||||
trainer_config_helpers/poolings.rst
|
||||
trainer_config_helpers/networks.rst
|
||||
trainer_config_helpers/evaluators.rst
|
||||
trainer_config_helpers/attrs.rst
|
||||
v2/data.rst
|
||||
|
||||
Train API
|
||||
---------
|
||||
|
||||
Applications API
|
||||
----------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
predict/swig_py_paddle_en.rst
|
||||
v2/run_logic.rst
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue