Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

0.10.0rc
Yancey1989 8 years ago
commit 047f3a766c

3
.gitmodules vendored

@ -0,0 +1,3 @@
[submodule "book"]
path = book
url = https://github.com/PaddlePaddle/book.git

@ -2,12 +2,12 @@
sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks:
- id: remove-crlf
files: (?!.*third_party)^.*$
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ # Bazel BUILD files follow Python syntax.
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
hooks:
@ -15,7 +15,7 @@
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*third_party)^.*$
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29

@ -29,13 +29,16 @@ Luo, Tao
Lyu, Qin
Mao, Hongyue
Qian, Xiaojun
Qiao, Longfei
Qi, Jun
Qin, Duohao
Shen, Guolong
Shi, Guangchuan
Song, Xiang
Wang, Helin
Wang, Jiang
Wang, Yanfei
Wang, Yi
Wang, Yong
Weng, Renliang
Xu, Tianbing

@ -0,0 +1 @@
Subproject commit 22ed2a01aee872f055b5f5f212428f481cefc10d

@ -14,7 +14,7 @@
INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf)
FIND_PACKAGE(Protobuf 3.1)
IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)

@ -13,9 +13,10 @@
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
from api_v2_resnet import resnet_cifar10
def main():
@ -23,16 +24,16 @@ def main():
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=True, trainer_count=1)
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
net = resnet_cifar10(image, depth=32)
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
# net = vgg_bn_drop(image)
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
@ -68,8 +69,8 @@ def main():
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
reader_dict={'image': 0,
'label': 1})
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
@ -83,8 +84,8 @@ def main():
batch_size=128),
num_passes=5,
event_handler=event_handler,
reader_dict={'image': 0,
'label': 1})
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':

@ -30,26 +30,26 @@ def main():
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
uci_housing.test(), batch_size=2),
reader_dict={'x': 0,
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
if event.pass_id % 10 == 0:
print "Test %d, %s" % (event.pass_id, result.metrics)
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.reader.batched(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
reader_dict={'x': 0,
'y': 1},
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)

@ -5,3 +5,6 @@ plot.png
train.log
*pyc
.ipynb_checkpoints
params.pkl
params.tar
params.tar.gz

@ -1,4 +1,5 @@
import paddle.v2 as paddle
import gzip
def softmax_regression(img):
@ -71,7 +72,11 @@ def main():
cost = paddle.layer.classification_cost(input=predict, label=label)
parameters = paddle.parameters.create(cost)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
@ -86,11 +91,15 @@ def main():
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.reader.batched(
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
@ -110,17 +119,16 @@ def main():
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append(item[0])
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output=predict,
parameters=parameters,
reader=paddle.batch(
paddle.reader.firstn(
paddle.reader.map_readers(lambda item: (item[0], ),
paddle.dataset.mnist.test()),
n=100),
batch_size=32))
probs = paddle.infer(output=predict, parameters=parameters, input=test_data)
print probs.shape

@ -0,0 +1,125 @@
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.regression_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()

@ -163,11 +163,11 @@ def main():
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.reader.batched(
trn_reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
reader_dict = {
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
@ -183,7 +183,7 @@ def main():
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
feeding=feeding)
if __name__ == '__main__':

@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling
import paddle.v2 as paddle
def convolution_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=128,
is_predict=False):
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3,
is_predict=False):
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim,
if __name__ == '__main__':
# init
paddle.init(use_gpu=True, trainer_count=4)
paddle.init(use_gpu=False, trainer_count=4)
# network config
print 'load dictionary...'
@ -143,11 +138,11 @@ if __name__ == '__main__':
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
reader=paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict),
batch_size=128),
reader_dict={'word': 0,
'label': 1})
feeding={'word': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
@ -156,11 +151,11 @@ if __name__ == '__main__':
update_equation=adam_optimizer)
trainer.train(
reader=paddle.reader.batched(
reader=paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100),
event_handler=event_handler,
reader_dict={'word': 0,
'label': 1},
feeding={'word': 0,
'label': 1},
num_passes=10)

@ -1,105 +1,139 @@
import os
import paddle.v2 as paddle
from seqToseq_net_v2 import seqToseq_net_v2
# Data Definiation.
# TODO:This code should be merged to dataset package.
data_dir = "./data/pre-wmt14"
src_lang_dict = os.path.join(data_dir, 'src.dict')
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
source_dict_dim = len(open(src_lang_dict, "r").readlines())
target_dict_dim = len(open(trg_lang_dict, "r").readlines())
def read_to_dict(dict_path):
with open(dict_path, "r") as fin:
out_dict = {
line.strip(): line_count
for line_count, line in enumerate(fin)
}
return out_dict
src_dict = read_to_dict(src_lang_dict)
trg_dict = read_to_dict(trg_lang_dict)
train_list = os.path.join(data_dir, 'train.list')
test_list = os.path.join(data_dir, 'test.list')
UNK_IDX = 2
START = "<s>"
END = "<e>"
def _get_ids(s, dictionary):
words = s.strip().split()
return [dictionary[START]] + \
[dictionary.get(w, UNK_IDX) for w in words] + \
[dictionary[END]]
def train_reader(file_name):
def reader():
with open(file_name, 'r') as f:
for line_count, line in enumerate(f):
line_split = line.strip().split('\t')
if len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
src_ids = _get_ids(src_seq, src_dict)
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
continue
trg_ids_next = trg_ids + [trg_dict[END]]
trg_ids = [trg_dict[START]] + trg_ids
yield src_ids, trg_ids, trg_ids_next
return reader
def seqToseq_net(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
def main():
paddle.init(use_gpu=False, trainer_count=1)
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# define network topology
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
reader_dict = {
# define data reader
feeding = {
'source_language_word': 0,
'target_language_word': 1,
'target_language_next_word': 2
}
trn_reader = paddle.reader.batched(
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
train_reader("data/pre-wmt14/train/train"), buf_size=8192),
paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# start to train
trainer.train(
reader=trn_reader,
reader=wmt14_reader,
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
feeding=feeding)
if __name__ == '__main__':

@ -1,90 +0,0 @@
import paddle.v2.activation as activation
import paddle.v2.attr as attr
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.networks as networks
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = layer.data(
name='source_language_word',
type=data_type.integer_value_sequence(source_dict_dim))
src_embedding = layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=attr.ParamAttr(name='_source_language_embedding'))
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size)
src_backward = networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = layer.concat(input=[src_forward, src_backward])
#### Decoder
with layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += layer.full_matrix_projection(input=encoded_vector)
backward_first = layer.first_seq(input=src_backward)
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot:
decoder_boot += layer.full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += layer.full_matrix_projection(input=context)
decoder_inputs += layer.full_matrix_projection(input=current_word)
gru_step = layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with layer.mixed(
size=target_dict_dim, bias_attr=True,
act=activation.Softmax()) as out:
out += layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = layer.embedding(
input=layer.data(
name='target_language_word',
type=data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = layer.data(
name='target_language_next_word',
type=data_type.integer_value_sequence(target_dict_dim))
cost = layer.classification_cost(input=decoder, label=lbl)
return cost

@ -0,0 +1,80 @@
import math
import paddle.v2 as paddle
dictsize = 1953
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.table_projection(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()

@ -1,2 +1,26 @@
API
===
===
模型配置 API
------------
.. toctree::
:maxdepth: 1
v2/model_configs.rst
数据 API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
训练 API
--------
.. toctree::
:maxdepth: 1
v2/run_logic.rst

@ -7,4 +7,20 @@ Model Config API
.. toctree::
:maxdepth: 1
v2/model_configs.rst
v2/model_configs.rst
Data API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
Train API
---------
.. toctree::
:maxdepth: 1
v2/run_logic.rst

@ -0,0 +1,93 @@
================
Data Related API
================
#########
DataTypes
#########
.. automodule:: paddle.v2.data_type
:members:
##########
DataFeeder
##########
.. automodule:: paddle.v2.data_feeder
:members:
######
Reader
######
.. automodule:: paddle.v2.reader
:members:
.. automodule:: paddle.v2.reader.creator
:members:
#########
minibatch
#########
.. automodule:: paddle.v2.minibatch
:members:
#######
Dataset
#######
.. automodule:: paddle.v2.dataset
:members:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:

@ -1,6 +1,46 @@
#########################
Configuration Related API
#########################
======
Layers
======
.. automodule:: paddle.v2.layer
:members:
==========
Attributes
==========
.. automodule:: paddle.v2.attr
:members:
===========
Activations
===========
.. automodule:: paddle.v2.activation
:members:
========
Poolings
========
.. automodule:: paddle.v2.pooling
:members:
========
Networks
========
.. automodule:: paddle.v2.networks
:members:
==========
Optimizers
==========
.. automodule:: paddle.v2.optimizer
:members:

@ -0,0 +1,34 @@
###########
Trainer API
###########
==========
Parameters
==========
.. automodule:: paddle.v2.parameters
:members:
=======
Trainer
=======
.. automodule:: paddle.v2.trainer
:members:
=====
Event
=====
.. automodule:: paddle.v2.event
:members:
=========
Inference
=========
.. autofunction:: paddle.v2.infer

@ -23,19 +23,19 @@ An example implementation for single item data reader creator:
```python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
```
An example implementation for multiple item data reader creator:
```python
def reader_creator_random_imageand_label(widht, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
```
## Batch Reader Interface
@ -74,11 +74,11 @@ mnist_train_batch_reader = paddle.batch(mnist_train, 128)
Also easy to create custom batch reader:
```python
def custom_batch_reader():
while True:
batch = []
for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch
while True:
batch = []
for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch
mnist_random_image_batch_reader = custom_batch_reader
```
@ -123,16 +123,16 @@ We can do:
```python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader_creator_bool(t):
def reader:
while True:
yield t
return reader
def reader:
while True:
yield t
return reader
true_reader = reader_creator_bool(True)
false_reader = reader_creator_bool(False)
@ -172,18 +172,18 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
```python
def image_reader_creator(image_path, label_path, n):
def reader():
f = open(image_path)
l = open(label_path)
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time
f.close()
l.close()
return reader
def reader():
f = open(image_path)
l = open(label_path)
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time
f.close()
l.close()
return reader
# images_reader_creator creates a reader
reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
@ -196,7 +196,7 @@ An example implementation of paddle.train could be:
```python
def train(batch_reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass):
for mini_batch in batch_reader(): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping)
for pass_idx in range(total_pass):
for mini_batch in batch_reader(): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping)
```

@ -346,7 +346,9 @@ Evaluator* MultiGradientMachine::makeEvaluator() const {
void MultiGradientMachine::eval(Evaluator* evaluator) const {
for (auto& thread : threads_) {
SetDevice device(thread->getDeviceId());
thread->getGradientMachine()->eval(evaluator);
if (thread->hasInputData()) {
thread->getGradientMachine()->eval(evaluator);
}
}
}
@ -356,14 +358,19 @@ void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs,
REGISTER_TIMER("waitOutArgs");
thread->waitOutArgsReady();
}
outArgs_.resize(threads_[0]->getOutArgs().size());
outArgs_.resize(threads_[threads_.size() - 1]->getOutArgs().size());
REGISTER_TIMER("copyOutArgs");
for (size_t i = 0; i < outArgs_.size(); ++i) {
std::vector<Argument> args;
args.reserve(threads_.size());
for (auto& thread : threads_) {
args.push_back(thread->getOutArgs()[i]);
// If the thread input is empty, then the output is empty.
auto tmp = thread->getOutArgs();
if (tmp.size() > 0) {
args.push_back(tmp[i]);
}
}
outArgs_[i].concat(args, useGpu_, outArgStream_, passType);
}
@ -534,7 +541,7 @@ void TrainerThread::prefetch() {
void TrainerThread::forward() {
if (!inArgsCopied_) {
REGISTER_TIMER("copyInArgs");
copyInArgs();
batchSize_ = copyInArgs();
} else {
inArgsCopied_ = false;
}
@ -564,7 +571,12 @@ void TrainerThread::forward() {
{
REGISTER_TIMER("thread_forward");
gradientMachine_->forward(inArgs_, &outArgs_, multiMachine_->getPassType());
if (batchSize_ > 0) {
gradientMachine_->forward(
inArgs_, &outArgs_, multiMachine_->getPassType());
} else {
outArgs_.clear();
}
}
outArgsReadySem_.post();
}
@ -574,7 +586,13 @@ void TrainerThread::backward() {
if (multiMachine_->isPassGrad()) {
copyOutputGrad();
}
gradientMachine_->backward(backwardCallback_);
if (batchSize_ > 0) {
gradientMachine_->backward(backwardCallback_);
} else {
for (size_t i = parameters_.size(); i > 0; i--) {
backwardCallback(parameters_[i - 1].get());
}
}
if (multiMachine_->hasNonstaticCpuParamters()) {
mergeCpuGradients();
}
@ -732,7 +750,7 @@ void TrainerThread::notifyValueReady(int paramId) {
notifyValueDispatch(paramId);
}
void TrainerThread::copyInArgs() {
int TrainerThread::copyInArgs() {
const std::vector<Argument>& fullInArgs = multiMachine_->getInArgs();
int numThreads = multiMachine_->getAllThreads().size();
int32_t numSequences = fullInArgs[0].getNumSequences();
@ -748,7 +766,7 @@ void TrainerThread::copyInArgs() {
}
if (copySize == 0) {
return;
return 0;
}
for (size_t i = 0; i < fullInArgs.size(); i++) {
@ -758,6 +776,7 @@ void TrainerThread::copyInArgs() {
copySize,
FLAGS_parallel_nn ? false : multiMachine_->useGpu());
}
return copySize;
}
void TrainerThread::mergeCpuGradients() {

@ -387,6 +387,9 @@ public:
/// copy the output gradient from the main GradientMachine.
void copyOutputGrad();
/// Whether the thread has input data.
bool hasInputData() { return batchSize_ != 0; }
protected:
void mergeCpuGradients();
@ -407,7 +410,7 @@ protected:
void copyGradToBufferThread();
void gradCollectThread();
void copyInArgs();
int copyInArgs();
void forward();
void backward();
void backwardCallback(Parameter* para);
@ -467,6 +470,7 @@ protected:
/// indicate whether inArgs is copied before forward()
bool inArgsCopied_;
int batchSize_;
};
} // namespace paddle

@ -45,6 +45,23 @@ class CacheType(object):
class InputType(object):
"""
InputType is the base class for paddle input types.
.. note::
this is a base class, and should never be used by user.
:param dim: dimension of input. If the input is an integer, it means the
value range. Otherwise, it means the size of layer.
:type dim: int
:param seq_type: sequence type of input. 0 means it is not a sequence. 1
means it is a variable length sequence. 2 means it is a
nested sequence.
:type seq_type: int
:param type: data type of input.
:type type: int
"""
__slots__ = ['dim', 'seq_type', 'type']
def __init__(self, dim, seq_type, tp):
@ -54,20 +71,61 @@ class InputType(object):
def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Dense Vector. It means the input feature is dense float vector. For example,
if the input is an image with 28*28 pixels, the input of Paddle neural
network should be a dense vector with dimension 784.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.Dense)
def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse binary vector. It means the input feature is a sparse vector and the
every element in this vector is either zero or one.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseNonValue)
def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse vector. It means the input feature is a sparse vector. Most of the
elements in this vector are zero, others could be any float value.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseValue)
def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
"""Data type of integer.
"""
Data type of integer.
:param seq_type: sequence type of this input.
:type seq_type: int
:param value_range: range of this integer.
:type value_range: int
:return: An input type object
:rtype: InputType
"""
return InputType(value_range, seq_type, DataType.Index)
@ -76,10 +134,17 @@ dense_vector = dense_slot
sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot
integer_value = index_slot
integer_value.__doc__ = index_slot.__doc__
def dense_vector_sequence(dim):
"""
Data type of a sequence of dense vector.
:param dim: dimension of dense vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
@ -88,6 +153,15 @@ def dense_vector_sub_sequence(dim):
def sparse_binary_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which every element is either zero
or one.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
@ -96,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
def sparse_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which most elements are zero,
others could be any float value.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_vector(dim, seq_type=SequenceType.SEQUENCE)
@ -104,8 +187,11 @@ def sparse_vector_sub_sequence(dim):
def integer_value_sequence(value_range):
"""Data type of a sequence of integer.
"""
Data type of a sequence of integer.
:param value_range: range of each element.
:type value_range: int
"""
return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
@ -115,7 +201,6 @@ def integer_value_sub_sequence(dim):
integer_sequence = integer_value_sequence
integer_sequence.__doc__ = integer_value_sequence.__doc__
class SingleSlotWrapper(object):

@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation())
register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation())
register_unary_math_op('relu', act.ReluActivation())
def add(layeroutput, other):

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save