commit
515543ab2f
@ -0,0 +1,3 @@
|
|||||||
|
[submodule "book"]
|
||||||
|
path = book
|
||||||
|
url = https://github.com/PaddlePaddle/book.git
|
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 22ed2a01aee872f055b5f5f212428f481cefc10d
|
@ -0,0 +1,74 @@
|
|||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
__all__ = ['resnet_cifar10']
|
||||||
|
|
||||||
|
|
||||||
|
def conv_bn_layer(input,
|
||||||
|
ch_out,
|
||||||
|
filter_size,
|
||||||
|
stride,
|
||||||
|
padding,
|
||||||
|
active_type=paddle.activation.Relu(),
|
||||||
|
ch_in=None):
|
||||||
|
tmp = paddle.layer.img_conv(
|
||||||
|
input=input,
|
||||||
|
filter_size=filter_size,
|
||||||
|
num_channels=ch_in,
|
||||||
|
num_filters=ch_out,
|
||||||
|
stride=stride,
|
||||||
|
padding=padding,
|
||||||
|
act=paddle.activation.Linear(),
|
||||||
|
bias_attr=False)
|
||||||
|
return paddle.layer.batch_norm(input=tmp, act=active_type)
|
||||||
|
|
||||||
|
|
||||||
|
def shortcut(ipt, n_in, n_out, stride):
|
||||||
|
if n_in != n_out:
|
||||||
|
return conv_bn_layer(ipt, n_out, 1, stride, 0,
|
||||||
|
paddle.activation.Linear())
|
||||||
|
else:
|
||||||
|
return ipt
|
||||||
|
|
||||||
|
|
||||||
|
def basicblock(ipt, ch_out, stride):
|
||||||
|
ch_in = ch_out * 2
|
||||||
|
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
|
||||||
|
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
|
||||||
|
short = shortcut(ipt, ch_in, ch_out, stride)
|
||||||
|
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
|
||||||
|
|
||||||
|
|
||||||
|
def layer_warp(block_func, ipt, features, count, stride):
|
||||||
|
tmp = block_func(ipt, features, stride)
|
||||||
|
for i in range(1, count):
|
||||||
|
tmp = block_func(tmp, features, 1)
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
|
||||||
|
def resnet_cifar10(ipt, depth=32):
|
||||||
|
# depth should be one of 20, 32, 44, 56, 110, 1202
|
||||||
|
assert (depth - 2) % 6 == 0
|
||||||
|
n = (depth - 2) / 6
|
||||||
|
nStages = {16, 64, 128}
|
||||||
|
conv1 = conv_bn_layer(
|
||||||
|
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
|
||||||
|
res1 = layer_warp(basicblock, conv1, 16, n, 1)
|
||||||
|
res2 = layer_warp(basicblock, res1, 32, n, 2)
|
||||||
|
res3 = layer_warp(basicblock, res2, 64, n, 2)
|
||||||
|
pool = paddle.layer.img_pool(
|
||||||
|
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
|
||||||
|
return pool
|
@ -0,0 +1,92 @@
|
|||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
from api_v2_vgg import vgg_bn_drop
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
datadim = 3 * 32 * 32
|
||||||
|
classdim = 10
|
||||||
|
|
||||||
|
# PaddlePaddle init
|
||||||
|
paddle.init(use_gpu=False, trainer_count=1)
|
||||||
|
|
||||||
|
image = paddle.layer.data(
|
||||||
|
name="image", type=paddle.data_type.dense_vector(datadim))
|
||||||
|
|
||||||
|
# Add neural network config
|
||||||
|
# option 1. resnet
|
||||||
|
# net = resnet_cifar10(image, depth=32)
|
||||||
|
# option 2. vgg
|
||||||
|
net = vgg_bn_drop(image)
|
||||||
|
|
||||||
|
out = paddle.layer.fc(input=net,
|
||||||
|
size=classdim,
|
||||||
|
act=paddle.activation.Softmax())
|
||||||
|
|
||||||
|
lbl = paddle.layer.data(
|
||||||
|
name="label", type=paddle.data_type.integer_value(classdim))
|
||||||
|
cost = paddle.layer.classification_cost(input=out, label=lbl)
|
||||||
|
|
||||||
|
# Create parameters
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
|
||||||
|
# Create optimizer
|
||||||
|
momentum_optimizer = paddle.optimizer.Momentum(
|
||||||
|
momentum=0.9,
|
||||||
|
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
|
||||||
|
learning_rate=0.1 / 128.0,
|
||||||
|
learning_rate_decay_a=0.1,
|
||||||
|
learning_rate_decay_b=50000 * 100,
|
||||||
|
learning_rate_schedule='discexp',
|
||||||
|
batch_size=128)
|
||||||
|
|
||||||
|
# End batch and end pass event handler
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
print "\nPass %d, Batch %d, Cost %f, %s" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||||
|
else:
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
if isinstance(event, paddle.event.EndPass):
|
||||||
|
result = trainer.test(
|
||||||
|
reader=paddle.batch(
|
||||||
|
paddle.dataset.cifar.test10(), batch_size=128),
|
||||||
|
feeding={'image': 0,
|
||||||
|
'label': 1})
|
||||||
|
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
|
||||||
|
|
||||||
|
# Create trainer
|
||||||
|
trainer = paddle.trainer.SGD(cost=cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=momentum_optimizer)
|
||||||
|
trainer.train(
|
||||||
|
reader=paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.cifar.train10(), buf_size=50000),
|
||||||
|
batch_size=128),
|
||||||
|
num_passes=5,
|
||||||
|
event_handler=event_handler,
|
||||||
|
feeding={'image': 0,
|
||||||
|
'label': 1})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,47 @@
|
|||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
__all__ = ['vgg_bn_drop']
|
||||||
|
|
||||||
|
|
||||||
|
def vgg_bn_drop(input):
|
||||||
|
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
|
||||||
|
return paddle.networks.img_conv_group(
|
||||||
|
input=ipt,
|
||||||
|
num_channels=num_channels,
|
||||||
|
pool_size=2,
|
||||||
|
pool_stride=2,
|
||||||
|
conv_num_filter=[num_filter] * groups,
|
||||||
|
conv_filter_size=3,
|
||||||
|
conv_act=paddle.activation.Relu(),
|
||||||
|
conv_with_batchnorm=True,
|
||||||
|
conv_batchnorm_drop_rate=dropouts,
|
||||||
|
pool_type=paddle.pooling.Max())
|
||||||
|
|
||||||
|
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
|
||||||
|
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
|
||||||
|
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
|
||||||
|
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
|
||||||
|
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
|
||||||
|
|
||||||
|
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
|
||||||
|
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
|
||||||
|
bn = paddle.layer.batch_norm(
|
||||||
|
input=fc1,
|
||||||
|
act=paddle.activation.Relu(),
|
||||||
|
layer_attr=paddle.attr.Extra(drop_rate=0.5))
|
||||||
|
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
|
||||||
|
return fc2
|
@ -0,0 +1,58 @@
|
|||||||
|
import paddle.v2 as paddle
|
||||||
|
import paddle.v2.dataset.uci_housing as uci_housing
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# init
|
||||||
|
paddle.init(use_gpu=False, trainer_count=1)
|
||||||
|
|
||||||
|
# network config
|
||||||
|
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
|
||||||
|
y_predict = paddle.layer.fc(input=x,
|
||||||
|
param_attr=paddle.attr.Param(name='w'),
|
||||||
|
size=1,
|
||||||
|
act=paddle.activation.Linear(),
|
||||||
|
bias_attr=paddle.attr.Param(name='b'))
|
||||||
|
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
|
||||||
|
cost = paddle.layer.regression_cost(input=y_predict, label=y)
|
||||||
|
|
||||||
|
# create parameters
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
|
||||||
|
# create optimizer
|
||||||
|
optimizer = paddle.optimizer.Momentum(momentum=0)
|
||||||
|
|
||||||
|
trainer = paddle.trainer.SGD(cost=cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=optimizer)
|
||||||
|
|
||||||
|
# event_handler to print training and testing info
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
print "Pass %d, Batch %d, Cost %f" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost)
|
||||||
|
|
||||||
|
if isinstance(event, paddle.event.EndPass):
|
||||||
|
if (event.pass_id + 1) % 10 == 0:
|
||||||
|
result = trainer.test(
|
||||||
|
reader=paddle.batch(
|
||||||
|
uci_housing.test(), batch_size=2),
|
||||||
|
feeding={'x': 0,
|
||||||
|
'y': 1})
|
||||||
|
print "Test %d, %.2f" % (event.pass_id, result.cost)
|
||||||
|
|
||||||
|
# training
|
||||||
|
trainer.train(
|
||||||
|
reader=paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
uci_housing.train(), buf_size=500),
|
||||||
|
batch_size=2),
|
||||||
|
feeding={'x': 0,
|
||||||
|
'y': 1},
|
||||||
|
event_handler=event_handler,
|
||||||
|
num_passes=30)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,125 @@
|
|||||||
|
import paddle.v2 as paddle
|
||||||
|
import cPickle
|
||||||
|
import copy
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
paddle.init(use_gpu=False)
|
||||||
|
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
|
||||||
|
uid = paddle.layer.data(
|
||||||
|
name='user_id',
|
||||||
|
type=paddle.data_type.integer_value(
|
||||||
|
paddle.dataset.movielens.max_user_id() + 1))
|
||||||
|
usr_emb = paddle.layer.embedding(input=uid, size=32)
|
||||||
|
|
||||||
|
usr_gender_id = paddle.layer.data(
|
||||||
|
name='gender_id', type=paddle.data_type.integer_value(2))
|
||||||
|
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
|
||||||
|
|
||||||
|
usr_age_id = paddle.layer.data(
|
||||||
|
name='age_id',
|
||||||
|
type=paddle.data_type.integer_value(
|
||||||
|
len(paddle.dataset.movielens.age_table)))
|
||||||
|
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
|
||||||
|
|
||||||
|
usr_job_id = paddle.layer.data(
|
||||||
|
name='job_id',
|
||||||
|
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
|
||||||
|
) + 1))
|
||||||
|
|
||||||
|
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
|
||||||
|
|
||||||
|
usr_combined_features = paddle.layer.fc(
|
||||||
|
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
|
||||||
|
size=200,
|
||||||
|
act=paddle.activation.Tanh())
|
||||||
|
|
||||||
|
mov_id = paddle.layer.data(
|
||||||
|
name='movie_id',
|
||||||
|
type=paddle.data_type.integer_value(
|
||||||
|
paddle.dataset.movielens.max_movie_id() + 1))
|
||||||
|
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
|
||||||
|
|
||||||
|
mov_categories = paddle.layer.data(
|
||||||
|
name='category_id',
|
||||||
|
type=paddle.data_type.sparse_binary_vector(
|
||||||
|
len(paddle.dataset.movielens.movie_categories())))
|
||||||
|
|
||||||
|
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
|
||||||
|
|
||||||
|
mov_title_id = paddle.layer.data(
|
||||||
|
name='movie_title',
|
||||||
|
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
|
||||||
|
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
|
||||||
|
mov_title_conv = paddle.networks.sequence_conv_pool(
|
||||||
|
input=mov_title_emb, hidden_size=32, context_len=3)
|
||||||
|
|
||||||
|
mov_combined_features = paddle.layer.fc(
|
||||||
|
input=[mov_emb, mov_categories_hidden, mov_title_conv],
|
||||||
|
size=200,
|
||||||
|
act=paddle.activation.Tanh())
|
||||||
|
|
||||||
|
inference = paddle.layer.cos_sim(
|
||||||
|
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
|
||||||
|
cost = paddle.layer.regression_cost(
|
||||||
|
input=inference,
|
||||||
|
label=paddle.layer.data(
|
||||||
|
name='score', type=paddle.data_type.dense_vector(1)))
|
||||||
|
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
|
||||||
|
trainer = paddle.trainer.SGD(cost=cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=paddle.optimizer.Adam(
|
||||||
|
learning_rate=1e-4))
|
||||||
|
feeding = {
|
||||||
|
'user_id': 0,
|
||||||
|
'gender_id': 1,
|
||||||
|
'age_id': 2,
|
||||||
|
'job_id': 3,
|
||||||
|
'movie_id': 4,
|
||||||
|
'category_id': 5,
|
||||||
|
'movie_title': 6,
|
||||||
|
'score': 7
|
||||||
|
}
|
||||||
|
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
print "Pass %d Batch %d Cost %.2f" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost)
|
||||||
|
|
||||||
|
trainer.train(
|
||||||
|
reader=paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.movielens.train(), buf_size=8192),
|
||||||
|
batch_size=256),
|
||||||
|
event_handler=event_handler,
|
||||||
|
feeding=feeding,
|
||||||
|
num_passes=1)
|
||||||
|
|
||||||
|
user_id = 234
|
||||||
|
movie_id = 345
|
||||||
|
|
||||||
|
user = paddle.dataset.movielens.user_info()[user_id]
|
||||||
|
movie = paddle.dataset.movielens.movie_info()[movie_id]
|
||||||
|
|
||||||
|
feature = user.value() + movie.value()
|
||||||
|
|
||||||
|
def reader():
|
||||||
|
yield feature
|
||||||
|
|
||||||
|
infer_dict = copy.copy(feeding)
|
||||||
|
del infer_dict['score']
|
||||||
|
|
||||||
|
prediction = paddle.infer(
|
||||||
|
output=inference,
|
||||||
|
parameters=parameters,
|
||||||
|
reader=paddle.batch(
|
||||||
|
reader, batch_size=32),
|
||||||
|
feeding=infer_dict)
|
||||||
|
print(prediction + 5) / 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,190 @@
|
|||||||
|
import sys
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
import paddle.v2.dataset.conll05 as conll05
|
||||||
|
|
||||||
|
|
||||||
|
def db_lstm():
|
||||||
|
word_dict, verb_dict, label_dict = conll05.get_dict()
|
||||||
|
word_dict_len = len(word_dict)
|
||||||
|
label_dict_len = len(label_dict)
|
||||||
|
pred_len = len(verb_dict)
|
||||||
|
|
||||||
|
mark_dict_len = 2
|
||||||
|
word_dim = 32
|
||||||
|
mark_dim = 5
|
||||||
|
hidden_dim = 512
|
||||||
|
depth = 8
|
||||||
|
|
||||||
|
#8 features
|
||||||
|
def d_type(size):
|
||||||
|
return paddle.data_type.integer_value_sequence(size)
|
||||||
|
|
||||||
|
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
|
||||||
|
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
|
||||||
|
|
||||||
|
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
|
||||||
|
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
|
||||||
|
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
|
||||||
|
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
|
||||||
|
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
|
||||||
|
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
|
||||||
|
|
||||||
|
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
|
||||||
|
|
||||||
|
default_std = 1 / math.sqrt(hidden_dim) / 3.0
|
||||||
|
|
||||||
|
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
|
||||||
|
std_0 = paddle.attr.Param(initial_std=0.)
|
||||||
|
std_default = paddle.attr.Param(initial_std=default_std)
|
||||||
|
|
||||||
|
predicate_embedding = paddle.layer.embedding(
|
||||||
|
size=word_dim,
|
||||||
|
input=predicate,
|
||||||
|
param_attr=paddle.attr.Param(
|
||||||
|
name='vemb', initial_std=default_std))
|
||||||
|
mark_embedding = paddle.layer.embedding(
|
||||||
|
size=mark_dim, input=mark, param_attr=std_0)
|
||||||
|
|
||||||
|
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
|
||||||
|
emb_layers = [
|
||||||
|
paddle.layer.embedding(
|
||||||
|
size=word_dim, input=x, param_attr=emb_para) for x in word_input
|
||||||
|
]
|
||||||
|
emb_layers.append(predicate_embedding)
|
||||||
|
emb_layers.append(mark_embedding)
|
||||||
|
|
||||||
|
hidden_0 = paddle.layer.mixed(
|
||||||
|
size=hidden_dim,
|
||||||
|
bias_attr=std_default,
|
||||||
|
input=[
|
||||||
|
paddle.layer.full_matrix_projection(
|
||||||
|
input=emb, param_attr=std_default) for emb in emb_layers
|
||||||
|
])
|
||||||
|
|
||||||
|
mix_hidden_lr = 1e-3
|
||||||
|
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
|
||||||
|
hidden_para_attr = paddle.attr.Param(
|
||||||
|
initial_std=default_std, learning_rate=mix_hidden_lr)
|
||||||
|
|
||||||
|
lstm_0 = paddle.layer.lstmemory(
|
||||||
|
input=hidden_0,
|
||||||
|
act=paddle.activation.Relu(),
|
||||||
|
gate_act=paddle.activation.Sigmoid(),
|
||||||
|
state_act=paddle.activation.Sigmoid(),
|
||||||
|
bias_attr=std_0,
|
||||||
|
param_attr=lstm_para_attr)
|
||||||
|
|
||||||
|
#stack L-LSTM and R-LSTM with direct edges
|
||||||
|
input_tmp = [hidden_0, lstm_0]
|
||||||
|
|
||||||
|
for i in range(1, depth):
|
||||||
|
mix_hidden = paddle.layer.mixed(
|
||||||
|
size=hidden_dim,
|
||||||
|
bias_attr=std_default,
|
||||||
|
input=[
|
||||||
|
paddle.layer.full_matrix_projection(
|
||||||
|
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||||
|
paddle.layer.full_matrix_projection(
|
||||||
|
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||||
|
])
|
||||||
|
|
||||||
|
lstm = paddle.layer.lstmemory(
|
||||||
|
input=mix_hidden,
|
||||||
|
act=paddle.activation.Relu(),
|
||||||
|
gate_act=paddle.activation.Sigmoid(),
|
||||||
|
state_act=paddle.activation.Sigmoid(),
|
||||||
|
reverse=((i % 2) == 1),
|
||||||
|
bias_attr=std_0,
|
||||||
|
param_attr=lstm_para_attr)
|
||||||
|
|
||||||
|
input_tmp = [mix_hidden, lstm]
|
||||||
|
|
||||||
|
feature_out = paddle.layer.mixed(
|
||||||
|
size=label_dict_len,
|
||||||
|
bias_attr=std_default,
|
||||||
|
input=[
|
||||||
|
paddle.layer.full_matrix_projection(
|
||||||
|
input=input_tmp[0], param_attr=hidden_para_attr),
|
||||||
|
paddle.layer.full_matrix_projection(
|
||||||
|
input=input_tmp[1], param_attr=lstm_para_attr)
|
||||||
|
], )
|
||||||
|
|
||||||
|
crf_cost = paddle.layer.crf(size=label_dict_len,
|
||||||
|
input=feature_out,
|
||||||
|
label=target,
|
||||||
|
param_attr=paddle.attr.Param(
|
||||||
|
name='crfw',
|
||||||
|
initial_std=default_std,
|
||||||
|
learning_rate=mix_hidden_lr))
|
||||||
|
|
||||||
|
crf_dec = paddle.layer.crf_decoding(
|
||||||
|
name='crf_dec_l',
|
||||||
|
size=label_dict_len,
|
||||||
|
input=feature_out,
|
||||||
|
label=target,
|
||||||
|
param_attr=paddle.attr.Param(name='crfw'))
|
||||||
|
|
||||||
|
return crf_cost, crf_dec
|
||||||
|
|
||||||
|
|
||||||
|
def load_parameter(file_name, h, w):
|
||||||
|
with open(file_name, 'rb') as f:
|
||||||
|
f.read(16) # skip header.
|
||||||
|
return np.fromfile(f, dtype=np.float32).reshape(h, w)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
paddle.init(use_gpu=False, trainer_count=1)
|
||||||
|
|
||||||
|
# define network topology
|
||||||
|
crf_cost, crf_dec = db_lstm()
|
||||||
|
|
||||||
|
# create parameters
|
||||||
|
parameters = paddle.parameters.create([crf_cost, crf_dec])
|
||||||
|
|
||||||
|
# create optimizer
|
||||||
|
optimizer = paddle.optimizer.Momentum(
|
||||||
|
momentum=0,
|
||||||
|
learning_rate=2e-2,
|
||||||
|
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
|
||||||
|
model_average=paddle.optimizer.ModelAverage(
|
||||||
|
average_window=0.5, max_average_window=10000), )
|
||||||
|
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
print "Pass %d, Batch %d, Cost %f, %s" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||||
|
|
||||||
|
trainer = paddle.trainer.SGD(cost=crf_cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=optimizer)
|
||||||
|
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
|
||||||
|
|
||||||
|
trn_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
conll05.test(), buf_size=8192), batch_size=10)
|
||||||
|
|
||||||
|
feeding = {
|
||||||
|
'word_data': 0,
|
||||||
|
'ctx_n2_data': 1,
|
||||||
|
'ctx_n1_data': 2,
|
||||||
|
'ctx_0_data': 3,
|
||||||
|
'ctx_p1_data': 4,
|
||||||
|
'ctx_p2_data': 5,
|
||||||
|
'verb_data': 6,
|
||||||
|
'mark_data': 7,
|
||||||
|
'target': 8
|
||||||
|
}
|
||||||
|
|
||||||
|
trainer.train(
|
||||||
|
reader=trn_reader,
|
||||||
|
event_handler=event_handler,
|
||||||
|
num_passes=10000,
|
||||||
|
feeding=feeding)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,159 @@
|
|||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
|
||||||
|
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
|
||||||
|
data = paddle.layer.data("word",
|
||||||
|
paddle.data_type.integer_value_sequence(input_dim))
|
||||||
|
emb = paddle.layer.embedding(input=data, size=emb_dim)
|
||||||
|
conv_3 = paddle.networks.sequence_conv_pool(
|
||||||
|
input=emb, context_len=3, hidden_size=hid_dim)
|
||||||
|
conv_4 = paddle.networks.sequence_conv_pool(
|
||||||
|
input=emb, context_len=4, hidden_size=hid_dim)
|
||||||
|
output = paddle.layer.fc(input=[conv_3, conv_4],
|
||||||
|
size=class_dim,
|
||||||
|
act=paddle.activation.Softmax())
|
||||||
|
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
|
||||||
|
cost = paddle.layer.classification_cost(input=output, label=lbl)
|
||||||
|
return cost
|
||||||
|
|
||||||
|
|
||||||
|
def stacked_lstm_net(input_dim,
|
||||||
|
class_dim=2,
|
||||||
|
emb_dim=128,
|
||||||
|
hid_dim=512,
|
||||||
|
stacked_num=3):
|
||||||
|
"""
|
||||||
|
A Wrapper for sentiment classification task.
|
||||||
|
This network uses bi-directional recurrent network,
|
||||||
|
consisting three LSTM layers. This configure is referred to
|
||||||
|
the paper as following url, but use fewer layrs.
|
||||||
|
http://www.aclweb.org/anthology/P15-1109
|
||||||
|
|
||||||
|
input_dim: here is word dictionary dimension.
|
||||||
|
class_dim: number of categories.
|
||||||
|
emb_dim: dimension of word embedding.
|
||||||
|
hid_dim: dimension of hidden layer.
|
||||||
|
stacked_num: number of stacked lstm-hidden layer.
|
||||||
|
"""
|
||||||
|
assert stacked_num % 2 == 1
|
||||||
|
|
||||||
|
layer_attr = paddle.attr.Extra(drop_rate=0.5)
|
||||||
|
fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
|
||||||
|
lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
|
||||||
|
para_attr = [fc_para_attr, lstm_para_attr]
|
||||||
|
bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
|
||||||
|
relu = paddle.activation.Relu()
|
||||||
|
linear = paddle.activation.Linear()
|
||||||
|
|
||||||
|
data = paddle.layer.data("word",
|
||||||
|
paddle.data_type.integer_value_sequence(input_dim))
|
||||||
|
emb = paddle.layer.embedding(input=data, size=emb_dim)
|
||||||
|
|
||||||
|
fc1 = paddle.layer.fc(input=emb,
|
||||||
|
size=hid_dim,
|
||||||
|
act=linear,
|
||||||
|
bias_attr=bias_attr)
|
||||||
|
lstm1 = paddle.layer.lstmemory(
|
||||||
|
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
|
||||||
|
|
||||||
|
inputs = [fc1, lstm1]
|
||||||
|
for i in range(2, stacked_num + 1):
|
||||||
|
fc = paddle.layer.fc(input=inputs,
|
||||||
|
size=hid_dim,
|
||||||
|
act=linear,
|
||||||
|
param_attr=para_attr,
|
||||||
|
bias_attr=bias_attr)
|
||||||
|
lstm = paddle.layer.lstmemory(
|
||||||
|
input=fc,
|
||||||
|
reverse=(i % 2) == 0,
|
||||||
|
act=relu,
|
||||||
|
bias_attr=bias_attr,
|
||||||
|
layer_attr=layer_attr)
|
||||||
|
inputs = [fc, lstm]
|
||||||
|
|
||||||
|
fc_last = paddle.layer.pooling(
|
||||||
|
input=inputs[0], pooling_type=paddle.pooling.Max())
|
||||||
|
lstm_last = paddle.layer.pooling(
|
||||||
|
input=inputs[1], pooling_type=paddle.pooling.Max())
|
||||||
|
output = paddle.layer.fc(input=[fc_last, lstm_last],
|
||||||
|
size=class_dim,
|
||||||
|
act=paddle.activation.Softmax(),
|
||||||
|
bias_attr=bias_attr,
|
||||||
|
param_attr=para_attr)
|
||||||
|
|
||||||
|
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
|
||||||
|
cost = paddle.layer.classification_cost(input=output, label=lbl)
|
||||||
|
return cost
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# init
|
||||||
|
paddle.init(use_gpu=False)
|
||||||
|
|
||||||
|
#data
|
||||||
|
print 'load dictionary...'
|
||||||
|
word_dict = paddle.dataset.imdb.word_dict()
|
||||||
|
dict_dim = len(word_dict)
|
||||||
|
class_dim = 2
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
|
||||||
|
batch_size=100)
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
|
||||||
|
|
||||||
|
feeding = {'word': 0, 'label': 1}
|
||||||
|
|
||||||
|
# network config
|
||||||
|
# Please choose the way to build the network
|
||||||
|
# by uncommenting the corresponding line.
|
||||||
|
cost = convolution_net(dict_dim, class_dim=class_dim)
|
||||||
|
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
|
||||||
|
|
||||||
|
# create parameters
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
|
||||||
|
# create optimizer
|
||||||
|
adam_optimizer = paddle.optimizer.Adam(
|
||||||
|
learning_rate=2e-3,
|
||||||
|
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
|
||||||
|
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
|
||||||
|
|
||||||
|
# End batch and end pass event handler
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
print "\nPass %d, Batch %d, Cost %f, %s" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||||
|
else:
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
if isinstance(event, paddle.event.EndPass):
|
||||||
|
result = trainer.test(reader=test_reader, feeding=feeding)
|
||||||
|
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
|
||||||
|
|
||||||
|
# create trainer
|
||||||
|
trainer = paddle.trainer.SGD(cost=cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=adam_optimizer)
|
||||||
|
|
||||||
|
trainer.train(
|
||||||
|
reader=train_reader,
|
||||||
|
event_handler=event_handler,
|
||||||
|
feeding=feeding,
|
||||||
|
num_passes=2)
|
@ -0,0 +1,146 @@
|
|||||||
|
import sys
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
|
||||||
|
def seqToseq_net(source_dict_dim, target_dict_dim):
|
||||||
|
### Network Architecture
|
||||||
|
word_vector_dim = 512 # dimension of word vector
|
||||||
|
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
|
||||||
|
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
|
||||||
|
|
||||||
|
#### Encoder
|
||||||
|
src_word_id = paddle.layer.data(
|
||||||
|
name='source_language_word',
|
||||||
|
type=paddle.data_type.integer_value_sequence(source_dict_dim))
|
||||||
|
src_embedding = paddle.layer.embedding(
|
||||||
|
input=src_word_id,
|
||||||
|
size=word_vector_dim,
|
||||||
|
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
|
||||||
|
src_forward = paddle.networks.simple_gru(
|
||||||
|
input=src_embedding, size=encoder_size)
|
||||||
|
src_backward = paddle.networks.simple_gru(
|
||||||
|
input=src_embedding, size=encoder_size, reverse=True)
|
||||||
|
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
|
||||||
|
|
||||||
|
#### Decoder
|
||||||
|
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
|
||||||
|
encoded_proj += paddle.layer.full_matrix_projection(
|
||||||
|
input=encoded_vector)
|
||||||
|
|
||||||
|
backward_first = paddle.layer.first_seq(input=src_backward)
|
||||||
|
|
||||||
|
with paddle.layer.mixed(
|
||||||
|
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
|
||||||
|
decoder_boot += paddle.layer.full_matrix_projection(
|
||||||
|
input=backward_first)
|
||||||
|
|
||||||
|
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
|
||||||
|
|
||||||
|
decoder_mem = paddle.layer.memory(
|
||||||
|
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
|
||||||
|
|
||||||
|
context = paddle.networks.simple_attention(
|
||||||
|
encoded_sequence=enc_vec,
|
||||||
|
encoded_proj=enc_proj,
|
||||||
|
decoder_state=decoder_mem)
|
||||||
|
|
||||||
|
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
|
||||||
|
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
|
||||||
|
decoder_inputs += paddle.layer.full_matrix_projection(
|
||||||
|
input=current_word)
|
||||||
|
|
||||||
|
gru_step = paddle.layer.gru_step(
|
||||||
|
name='gru_decoder',
|
||||||
|
input=decoder_inputs,
|
||||||
|
output_mem=decoder_mem,
|
||||||
|
size=decoder_size)
|
||||||
|
|
||||||
|
with paddle.layer.mixed(
|
||||||
|
size=target_dict_dim,
|
||||||
|
bias_attr=True,
|
||||||
|
act=paddle.activation.Softmax()) as out:
|
||||||
|
out += paddle.layer.full_matrix_projection(input=gru_step)
|
||||||
|
return out
|
||||||
|
|
||||||
|
decoder_group_name = "decoder_group"
|
||||||
|
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
|
||||||
|
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
|
||||||
|
group_inputs = [group_input1, group_input2]
|
||||||
|
|
||||||
|
trg_embedding = paddle.layer.embedding(
|
||||||
|
input=paddle.layer.data(
|
||||||
|
name='target_language_word',
|
||||||
|
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
|
||||||
|
size=word_vector_dim,
|
||||||
|
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
|
||||||
|
group_inputs.append(trg_embedding)
|
||||||
|
|
||||||
|
# For decoder equipped with attention mechanism, in training,
|
||||||
|
# target embeding (the groudtruth) is the data input,
|
||||||
|
# while encoded source sequence is accessed to as an unbounded memory.
|
||||||
|
# Here, the StaticInput defines a read-only memory
|
||||||
|
# for the recurrent_group.
|
||||||
|
decoder = paddle.layer.recurrent_group(
|
||||||
|
name=decoder_group_name,
|
||||||
|
step=gru_decoder_with_attention,
|
||||||
|
input=group_inputs)
|
||||||
|
|
||||||
|
lbl = paddle.layer.data(
|
||||||
|
name='target_language_next_word',
|
||||||
|
type=paddle.data_type.integer_value_sequence(target_dict_dim))
|
||||||
|
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
|
||||||
|
|
||||||
|
return cost
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
paddle.init(use_gpu=False, trainer_count=1)
|
||||||
|
|
||||||
|
# source and target dict dim.
|
||||||
|
dict_size = 30000
|
||||||
|
source_dict_dim = target_dict_dim = dict_size
|
||||||
|
|
||||||
|
# define network topology
|
||||||
|
cost = seqToseq_net(source_dict_dim, target_dict_dim)
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
|
||||||
|
# define optimize method and trainer
|
||||||
|
optimizer = paddle.optimizer.Adam(
|
||||||
|
learning_rate=5e-5,
|
||||||
|
regularization=paddle.optimizer.L2Regularization(rate=1e-3))
|
||||||
|
trainer = paddle.trainer.SGD(cost=cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=optimizer)
|
||||||
|
|
||||||
|
# define data reader
|
||||||
|
feeding = {
|
||||||
|
'source_language_word': 0,
|
||||||
|
'target_language_word': 1,
|
||||||
|
'target_language_next_word': 2
|
||||||
|
}
|
||||||
|
|
||||||
|
wmt14_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
|
||||||
|
batch_size=5)
|
||||||
|
|
||||||
|
# define event_handler callback
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 10 == 0:
|
||||||
|
print "\nPass %d, Batch %d, Cost %f, %s" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost, event.metrics)
|
||||||
|
else:
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
# start to train
|
||||||
|
trainer.train(
|
||||||
|
reader=wmt14_reader,
|
||||||
|
event_handler=event_handler,
|
||||||
|
num_passes=10000,
|
||||||
|
feeding=feeding)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,80 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
dictsize = 1953
|
||||||
|
embsize = 32
|
||||||
|
hiddensize = 256
|
||||||
|
N = 5
|
||||||
|
|
||||||
|
|
||||||
|
def wordemb(inlayer):
|
||||||
|
wordemb = paddle.layer.table_projection(
|
||||||
|
input=inlayer,
|
||||||
|
size=embsize,
|
||||||
|
param_attr=paddle.attr.Param(
|
||||||
|
name="_proj",
|
||||||
|
initial_std=0.001,
|
||||||
|
learning_rate=1,
|
||||||
|
l2_rate=0, ))
|
||||||
|
return wordemb
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
paddle.init(use_gpu=False, trainer_count=1)
|
||||||
|
word_dict = paddle.dataset.imikolov.build_dict()
|
||||||
|
dict_size = len(word_dict)
|
||||||
|
firstword = paddle.layer.data(
|
||||||
|
name="firstw", type=paddle.data_type.integer_value(dict_size))
|
||||||
|
secondword = paddle.layer.data(
|
||||||
|
name="secondw", type=paddle.data_type.integer_value(dict_size))
|
||||||
|
thirdword = paddle.layer.data(
|
||||||
|
name="thirdw", type=paddle.data_type.integer_value(dict_size))
|
||||||
|
fourthword = paddle.layer.data(
|
||||||
|
name="fourthw", type=paddle.data_type.integer_value(dict_size))
|
||||||
|
nextword = paddle.layer.data(
|
||||||
|
name="fifthw", type=paddle.data_type.integer_value(dict_size))
|
||||||
|
|
||||||
|
Efirst = wordemb(firstword)
|
||||||
|
Esecond = wordemb(secondword)
|
||||||
|
Ethird = wordemb(thirdword)
|
||||||
|
Efourth = wordemb(fourthword)
|
||||||
|
|
||||||
|
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
|
||||||
|
hidden1 = paddle.layer.fc(input=contextemb,
|
||||||
|
size=hiddensize,
|
||||||
|
act=paddle.activation.Sigmoid(),
|
||||||
|
layer_attr=paddle.attr.Extra(drop_rate=0.5),
|
||||||
|
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||||
|
param_attr=paddle.attr.Param(
|
||||||
|
initial_std=1. / math.sqrt(embsize * 8),
|
||||||
|
learning_rate=1))
|
||||||
|
predictword = paddle.layer.fc(input=hidden1,
|
||||||
|
size=dict_size,
|
||||||
|
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||||
|
act=paddle.activation.Softmax())
|
||||||
|
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
result = trainer.test(
|
||||||
|
paddle.batch(
|
||||||
|
paddle.dataset.imikolov.test(word_dict, N), 32))
|
||||||
|
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost, event.metrics,
|
||||||
|
result.metrics)
|
||||||
|
|
||||||
|
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
adam_optimizer = paddle.optimizer.Adam(
|
||||||
|
learning_rate=3e-3,
|
||||||
|
regularization=paddle.optimizer.L2Regularization(8e-4))
|
||||||
|
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
|
||||||
|
trainer.train(
|
||||||
|
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
|
||||||
|
num_passes=30,
|
||||||
|
event_handler=event_handler)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -1,37 +1,26 @@
|
|||||||
API中文手册
|
API
|
||||||
============
|
===
|
||||||
|
|
||||||
DataProvider API
|
模型配置 API
|
||||||
----------------
|
------------
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
data_provider/dataprovider_cn.rst
|
v2/model_configs.rst
|
||||||
data_provider/pydataprovider2_cn.rst
|
|
||||||
|
|
||||||
.. _api_trainer_config:
|
数据 API
|
||||||
|
--------
|
||||||
Model Config API
|
|
||||||
----------------
|
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
trainer_config_helpers/optimizers.rst
|
v2/data.rst
|
||||||
trainer_config_helpers/data_sources.rst
|
|
||||||
trainer_config_helpers/layers.rst
|
|
||||||
trainer_config_helpers/activations.rst
|
|
||||||
trainer_config_helpers/poolings.rst
|
|
||||||
trainer_config_helpers/networks.rst
|
|
||||||
trainer_config_helpers/evaluators.rst
|
|
||||||
trainer_config_helpers/attrs.rst
|
|
||||||
|
|
||||||
|
|
||||||
Applications API
|
训练 API
|
||||||
----------------
|
--------
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
predict/swig_py_paddle_cn.rst
|
v2/run_logic.rst
|
@ -1,37 +1,26 @@
|
|||||||
API
|
API
|
||||||
===
|
===
|
||||||
|
|
||||||
DataProvider API
|
Model Config API
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
data_provider/dataprovider_en.rst
|
v2/model_configs.rst
|
||||||
data_provider/pydataprovider2_en.rst
|
|
||||||
|
|
||||||
.. _api_trainer_config:
|
|
||||||
|
|
||||||
Model Config API
|
Data API
|
||||||
----------------
|
--------
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
trainer_config_helpers/optimizers.rst
|
v2/data.rst
|
||||||
trainer_config_helpers/data_sources.rst
|
|
||||||
trainer_config_helpers/layers.rst
|
|
||||||
trainer_config_helpers/activations.rst
|
|
||||||
trainer_config_helpers/poolings.rst
|
|
||||||
trainer_config_helpers/networks.rst
|
|
||||||
trainer_config_helpers/evaluators.rst
|
|
||||||
trainer_config_helpers/attrs.rst
|
|
||||||
|
|
||||||
|
Train API
|
||||||
|
---------
|
||||||
|
|
||||||
Applications API
|
.. toctree::
|
||||||
----------------
|
:maxdepth: 1
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
predict/swig_py_paddle_en.rst
|
v2/run_logic.rst
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue