parent
ec33832602
commit
4d4322a698
@ -1,5 +0,0 @@
|
||||
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
|
||||
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
|
||||
foreach(src ${TEST_OPS})
|
||||
py_test(${src} SRCS ${src}.py)
|
||||
endforeach()
|
@ -1,76 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import os
|
||||
|
||||
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
|
||||
|
||||
y_predict = fluid.layers.fc(input=x, size=1, act=None)
|
||||
|
||||
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
|
||||
|
||||
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
|
||||
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
|
||||
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
|
||||
|
||||
BATCH_SIZE = 20
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.uci_housing.train(), buf_size=500),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv("TRAINING_ROLE",
|
||||
"TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
else:
|
||||
trainer_prog = t.get_trainer_program()
|
||||
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
PASS_NUM = 100
|
||||
for pass_id in range(PASS_NUM):
|
||||
for data in train_reader():
|
||||
avg_loss_value = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost])
|
||||
print("loss:" + str(avg_loss_value))
|
||||
if avg_loss_value[0] < 10.0:
|
||||
exit(0)
|
||||
exit(1)
|
@ -1,171 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import os
|
||||
import sys
|
||||
|
||||
TRAINERS = 5
|
||||
BATCH_SIZE = 128
|
||||
PASS_NUM = 100
|
||||
|
||||
|
||||
def resnet_cifar10(input, depth=32):
|
||||
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
|
||||
tmp = fluid.layers.conv2d(
|
||||
input=input,
|
||||
filter_size=filter_size,
|
||||
num_filters=ch_out,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
act=None,
|
||||
bias_attr=False)
|
||||
return fluid.layers.batch_norm(input=tmp, act=act)
|
||||
|
||||
def shortcut(input, ch_in, ch_out, stride):
|
||||
if ch_in != ch_out:
|
||||
return conv_bn_layer(input, ch_out, 1, stride, 0, None)
|
||||
else:
|
||||
return input
|
||||
|
||||
def basicblock(input, ch_in, ch_out, stride):
|
||||
tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
|
||||
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
|
||||
short = shortcut(input, ch_in, ch_out, stride)
|
||||
return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
|
||||
|
||||
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
|
||||
tmp = block_func(input, ch_in, ch_out, stride)
|
||||
for i in range(1, count):
|
||||
tmp = block_func(tmp, ch_out, ch_out, 1)
|
||||
return tmp
|
||||
|
||||
assert (depth - 2) % 6 == 0
|
||||
n = (depth - 2) / 6
|
||||
conv1 = conv_bn_layer(
|
||||
input=input, ch_out=16, filter_size=3, stride=1, padding=1)
|
||||
res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
|
||||
res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
|
||||
res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
|
||||
pool = fluid.layers.pool2d(
|
||||
input=res3, pool_size=8, pool_type='avg', pool_stride=1)
|
||||
return pool
|
||||
|
||||
|
||||
def vgg16_bn_drop(input):
|
||||
def conv_block(input, num_filter, groups, dropouts):
|
||||
return fluid.nets.img_conv_group(
|
||||
input=input,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
conv_num_filter=[num_filter] * groups,
|
||||
conv_filter_size=3,
|
||||
conv_act='relu',
|
||||
conv_with_batchnorm=True,
|
||||
conv_batchnorm_drop_rate=dropouts,
|
||||
pool_type='max')
|
||||
|
||||
conv1 = conv_block(input, 64, 2, [0.3, 0])
|
||||
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
|
||||
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
|
||||
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
|
||||
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
|
||||
|
||||
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
|
||||
fc1 = fluid.layers.fc(input=drop, size=512, act=None)
|
||||
bn = fluid.layers.batch_norm(input=fc1, act='relu')
|
||||
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
|
||||
fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
|
||||
return fc2
|
||||
|
||||
|
||||
classdim = 10
|
||||
data_shape = [3, 32, 32]
|
||||
|
||||
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
|
||||
net_type = "vgg"
|
||||
if len(sys.argv) >= 2:
|
||||
net_type = sys.argv[1]
|
||||
|
||||
if net_type == "vgg":
|
||||
print("training vgg net")
|
||||
net = vgg16_bn_drop(images)
|
||||
elif net_type == "resnet":
|
||||
print("training resnet")
|
||||
net = resnet_cifar10(images, 32)
|
||||
else:
|
||||
raise ValueError("%s network is not supported" % net_type)
|
||||
|
||||
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
|
||||
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
|
||||
optimize_ops, params_grads = optimizer.minimize(avg_cost)
|
||||
|
||||
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.cifar.train10(), buf_size=128 * 10),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv("TRAINING_ROLE",
|
||||
"TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=TRAINERS)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
trainer_prog = t.get_trainer_program()
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
for pass_id in range(PASS_NUM):
|
||||
accuracy.reset(exe)
|
||||
for data in train_reader():
|
||||
loss, acc = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost] + accuracy.metrics)
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("pass_id:" + str(pass_id) + "loss:" + str(loss) + " pass_acc:"
|
||||
+ str(pass_acc))
|
||||
# this model is slow, so if we can train two mini batches,
|
||||
# we think it works properly.
|
||||
print("trainer run end")
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
||||
exit(1)
|
@ -1,241 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.dataset.conll05 as conll05
|
||||
import paddle.fluid as fluid
|
||||
import time
|
||||
import os
|
||||
|
||||
word_dict, verb_dict, label_dict = conll05.get_dict()
|
||||
word_dict_len = len(word_dict)
|
||||
label_dict_len = len(label_dict)
|
||||
pred_len = len(verb_dict)
|
||||
|
||||
mark_dict_len = 2
|
||||
word_dim = 32
|
||||
mark_dim = 5
|
||||
hidden_dim = 512
|
||||
depth = 8
|
||||
mix_hidden_lr = 1e-3
|
||||
|
||||
IS_SPARSE = True
|
||||
PASS_NUM = 10
|
||||
BATCH_SIZE = 20
|
||||
|
||||
embedding_name = 'emb'
|
||||
|
||||
|
||||
def load_parameter(file_name, h, w):
|
||||
with open(file_name, 'rb') as f:
|
||||
f.read(16) # skip header.
|
||||
return np.fromfile(f, dtype=np.float32).reshape(h, w)
|
||||
|
||||
|
||||
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
|
||||
**ignored):
|
||||
# 8 features
|
||||
predicate_embedding = fluid.layers.embedding(
|
||||
input=predicate,
|
||||
size=[pred_len, word_dim],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr='vemb')
|
||||
|
||||
mark_embedding = fluid.layers.embedding(
|
||||
input=mark,
|
||||
size=[mark_dict_len, mark_dim],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE)
|
||||
|
||||
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
|
||||
emb_layers = [
|
||||
fluid.layers.embedding(
|
||||
size=[word_dict_len, word_dim],
|
||||
input=x,
|
||||
param_attr=fluid.ParamAttr(
|
||||
name=embedding_name, trainable=False)) for x in word_input
|
||||
]
|
||||
emb_layers.append(predicate_embedding)
|
||||
emb_layers.append(mark_embedding)
|
||||
|
||||
hidden_0_layers = [
|
||||
fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
|
||||
]
|
||||
|
||||
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
|
||||
|
||||
lstm_0 = fluid.layers.dynamic_lstm(
|
||||
input=hidden_0,
|
||||
size=hidden_dim,
|
||||
candidate_activation='relu',
|
||||
gate_activation='sigmoid',
|
||||
cell_activation='sigmoid')
|
||||
|
||||
# stack L-LSTM and R-LSTM with direct edges
|
||||
input_tmp = [hidden_0, lstm_0]
|
||||
|
||||
for i in range(1, depth):
|
||||
mix_hidden = fluid.layers.sums(input=[
|
||||
fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
|
||||
fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
|
||||
])
|
||||
|
||||
lstm = fluid.layers.dynamic_lstm(
|
||||
input=mix_hidden,
|
||||
size=hidden_dim,
|
||||
candidate_activation='relu',
|
||||
gate_activation='sigmoid',
|
||||
cell_activation='sigmoid',
|
||||
is_reverse=((i % 2) == 1))
|
||||
|
||||
input_tmp = [mix_hidden, lstm]
|
||||
|
||||
feature_out = fluid.layers.sums(input=[
|
||||
fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
|
||||
fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
|
||||
])
|
||||
|
||||
return feature_out
|
||||
|
||||
|
||||
def to_lodtensor(data, place):
|
||||
seq_lens = [len(seq) for seq in data]
|
||||
cur_len = 0
|
||||
lod = [cur_len]
|
||||
for l in seq_lens:
|
||||
cur_len += l
|
||||
lod.append(cur_len)
|
||||
flattened_data = np.concatenate(data, axis=0).astype("int64")
|
||||
flattened_data = flattened_data.reshape([len(flattened_data), 1])
|
||||
res = fluid.LoDTensor()
|
||||
res.set(flattened_data, place)
|
||||
res.set_lod([lod])
|
||||
return res
|
||||
|
||||
|
||||
def main():
|
||||
# define network topology
|
||||
word = fluid.layers.data(
|
||||
name='word_data', shape=[1], dtype='int64', lod_level=1)
|
||||
predicate = fluid.layers.data(
|
||||
name='verb_data', shape=[1], dtype='int64', lod_level=1)
|
||||
ctx_n2 = fluid.layers.data(
|
||||
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
|
||||
ctx_n1 = fluid.layers.data(
|
||||
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
|
||||
ctx_0 = fluid.layers.data(
|
||||
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
|
||||
ctx_p1 = fluid.layers.data(
|
||||
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
|
||||
ctx_p2 = fluid.layers.data(
|
||||
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
|
||||
mark = fluid.layers.data(
|
||||
name='mark_data', shape=[1], dtype='int64', lod_level=1)
|
||||
feature_out = db_lstm(**locals())
|
||||
target = fluid.layers.data(
|
||||
name='target', shape=[1], dtype='int64', lod_level=1)
|
||||
crf_cost = fluid.layers.linear_chain_crf(
|
||||
input=feature_out,
|
||||
label=target,
|
||||
param_attr=fluid.ParamAttr(
|
||||
name='crfw', learning_rate=mix_hidden_lr))
|
||||
avg_cost = fluid.layers.mean(crf_cost)
|
||||
|
||||
# TODO(qiao)
|
||||
# check other optimizers and check why out will be NAN
|
||||
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
|
||||
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
|
||||
|
||||
# TODO(qiao)
|
||||
# add dependency track and move this config before optimizer
|
||||
crf_decode = fluid.layers.crf_decoding(
|
||||
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
|
||||
|
||||
chunk_evaluator = fluid.evaluator.ChunkEvaluator(
|
||||
input=crf_decode,
|
||||
label=target,
|
||||
chunk_scheme="IOB",
|
||||
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
|
||||
|
||||
train_data = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.conll05.test(), buf_size=8192),
|
||||
batch_size=BATCH_SIZE)
|
||||
place = fluid.CPUPlace()
|
||||
feeder = fluid.DataFeeder(
|
||||
feed_list=[
|
||||
word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
|
||||
],
|
||||
place=place)
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv(
|
||||
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
trainer_prog = t.get_trainer_program()
|
||||
start_time = time.time()
|
||||
batch_id = 0
|
||||
exe.run(fluid.default_startup_program())
|
||||
embedding_param = fluid.global_scope().find_var(
|
||||
embedding_name).get_tensor()
|
||||
embedding_param.set(
|
||||
load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
|
||||
place)
|
||||
for pass_id in xrange(PASS_NUM):
|
||||
chunk_evaluator.reset(exe)
|
||||
for data in train_data():
|
||||
cost, precision, recall, f1_score = exe.run(
|
||||
trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost] + chunk_evaluator.metrics)
|
||||
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
|
||||
exe)
|
||||
|
||||
if batch_id % 10 == 0:
|
||||
print("avg_cost:" + str(cost) + " precision:" + str(
|
||||
precision) + " recall:" + str(recall) + " f1_score:" +
|
||||
str(f1_score) + " pass_precision:" + str(
|
||||
pass_precision) + " pass_recall:" + str(
|
||||
pass_recall) + " pass_f1_score:" + str(
|
||||
pass_f1_score))
|
||||
if batch_id != 0:
|
||||
print("second per batch: " + str((time.time(
|
||||
) - start_time) / batch_id))
|
||||
|
||||
batch_id = batch_id + 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,113 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import os
|
||||
|
||||
PASS_NUM = 100
|
||||
EMBED_SIZE = 32
|
||||
HIDDEN_SIZE = 256
|
||||
N = 5
|
||||
BATCH_SIZE = 32
|
||||
IS_SPARSE = True
|
||||
TRAINERS = 2
|
||||
|
||||
word_dict = paddle.dataset.imikolov.build_dict()
|
||||
dict_size = len(word_dict)
|
||||
|
||||
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
|
||||
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
|
||||
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
|
||||
forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
|
||||
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
|
||||
|
||||
embed_first = fluid.layers.embedding(
|
||||
input=first_word,
|
||||
size=[dict_size, EMBED_SIZE],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr='shared_w')
|
||||
embed_second = fluid.layers.embedding(
|
||||
input=second_word,
|
||||
size=[dict_size, EMBED_SIZE],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr='shared_w')
|
||||
embed_third = fluid.layers.embedding(
|
||||
input=third_word,
|
||||
size=[dict_size, EMBED_SIZE],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr='shared_w')
|
||||
embed_forth = fluid.layers.embedding(
|
||||
input=forth_word,
|
||||
size=[dict_size, EMBED_SIZE],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr='shared_w')
|
||||
|
||||
concat_embed = fluid.layers.concat(
|
||||
input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
|
||||
hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid')
|
||||
predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
|
||||
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
|
||||
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
|
||||
train_reader = paddle.batch(
|
||||
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv("TRAINING_ROLE",
|
||||
"TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=TRAINERS)
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
feeder = fluid.DataFeeder(
|
||||
feed_list=[first_word, second_word, third_word, forth_word, next_word],
|
||||
place=place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
trainer_prog = t.get_trainer_program()
|
||||
for pass_id in range(PASS_NUM):
|
||||
for data in train_reader():
|
||||
avg_cost_np = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost])
|
||||
print("avg_cost_np", avg_cost_np)
|
||||
if avg_cost_np[0] < 5.0:
|
||||
exit(
|
||||
0) # if avg cost less than 10.0, we think our code is good.
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
||||
exit(1)
|
@ -1,158 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.core as core
|
||||
import paddle.fluid.framework as framework
|
||||
import paddle.fluid.layers as layers
|
||||
from paddle.fluid.executor import Executor
|
||||
import os
|
||||
|
||||
dict_size = 30000
|
||||
source_dict_dim = target_dict_dim = dict_size
|
||||
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
|
||||
hidden_dim = 32
|
||||
word_dim = 16
|
||||
IS_SPARSE = True
|
||||
batch_size = 10
|
||||
max_length = 50
|
||||
topk_size = 50
|
||||
trg_dic_size = 10000
|
||||
|
||||
decoder_size = hidden_dim
|
||||
|
||||
|
||||
def encoder_decoder():
|
||||
# encoder
|
||||
src_word_id = layers.data(
|
||||
name="src_word_id", shape=[1], dtype='int64', lod_level=1)
|
||||
src_embedding = layers.embedding(
|
||||
input=src_word_id,
|
||||
size=[dict_size, word_dim],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr=fluid.ParamAttr(name='vemb'))
|
||||
|
||||
fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
|
||||
lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4)
|
||||
encoder_out = layers.sequence_last_step(input=lstm_hidden0)
|
||||
|
||||
# decoder
|
||||
trg_language_word = layers.data(
|
||||
name="target_language_word", shape=[1], dtype='int64', lod_level=1)
|
||||
trg_embedding = layers.embedding(
|
||||
input=trg_language_word,
|
||||
size=[dict_size, word_dim],
|
||||
dtype='float32',
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr=fluid.ParamAttr(name='vemb'))
|
||||
|
||||
rnn = fluid.layers.DynamicRNN()
|
||||
with rnn.block():
|
||||
current_word = rnn.step_input(trg_embedding)
|
||||
mem = rnn.memory(init=encoder_out)
|
||||
fc1 = fluid.layers.fc(input=[current_word, mem],
|
||||
size=decoder_size,
|
||||
act='tanh')
|
||||
out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax')
|
||||
rnn.update_memory(mem, fc1)
|
||||
rnn.output(out)
|
||||
|
||||
return rnn()
|
||||
|
||||
|
||||
def to_lodtensor(data, place):
|
||||
seq_lens = [len(seq) for seq in data]
|
||||
cur_len = 0
|
||||
lod = [cur_len]
|
||||
for l in seq_lens:
|
||||
cur_len += l
|
||||
lod.append(cur_len)
|
||||
flattened_data = np.concatenate(data, axis=0).astype("int64")
|
||||
flattened_data = flattened_data.reshape([len(flattened_data), 1])
|
||||
res = core.LoDTensor()
|
||||
res.set(flattened_data, place)
|
||||
res.set_lod([lod])
|
||||
return res
|
||||
|
||||
|
||||
def main():
|
||||
rnn_out = encoder_decoder()
|
||||
label = layers.data(
|
||||
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
|
||||
cost = layers.cross_entropy(input=rnn_out, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
|
||||
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
|
||||
optimize_ops, params_grads = optimizer.minimize(avg_cost)
|
||||
|
||||
train_data = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.wmt14.train(dict_size), buf_size=1000),
|
||||
batch_size=batch_size)
|
||||
|
||||
place = core.CPUPlace()
|
||||
exe = Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv(
|
||||
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
trainer_prog = t.get_trainer_program()
|
||||
exe.run(framework.default_startup_program())
|
||||
|
||||
batch_id = 0
|
||||
for pass_id in xrange(2):
|
||||
for data in train_data():
|
||||
word_data = to_lodtensor(map(lambda x: x[0], data), place)
|
||||
trg_word = to_lodtensor(map(lambda x: x[1], data), place)
|
||||
trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
|
||||
outs = exe.run(trainer_prog,
|
||||
feed={
|
||||
'src_word_id': word_data,
|
||||
'target_language_word': trg_word,
|
||||
'target_language_next_word': trg_word_next
|
||||
},
|
||||
fetch_list=[avg_cost])
|
||||
avg_cost_val = np.array(outs[0])
|
||||
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
|
||||
" avg_cost=" + str(avg_cost_val))
|
||||
if batch_id > 3:
|
||||
exit(0)
|
||||
batch_id += 1
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,96 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import os
|
||||
|
||||
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
conv_pool_1 = fluid.nets.simple_img_conv_pool(
|
||||
input=images,
|
||||
filter_size=5,
|
||||
num_filters=20,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act="relu")
|
||||
conv_pool_2 = fluid.nets.simple_img_conv_pool(
|
||||
input=conv_pool_1,
|
||||
filter_size=5,
|
||||
num_filters=50,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act="relu")
|
||||
|
||||
predict = fluid.layers.fc(input=conv_pool_2, size=10, act="softmax")
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
optimizer = fluid.optimizer.Adam(learning_rate=0.01)
|
||||
optimize_ops, params_grads = optimizer.minimize(avg_cost)
|
||||
|
||||
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
|
||||
|
||||
BATCH_SIZE = 50
|
||||
PASS_NUM = 3
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=500),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
pserver_endpoints = os.getenv("PSERVERS") # all pserver endpoints
|
||||
trainers = int(os.getenv("TRAINERS")) # total trainer count
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT") # current pserver endpoint
|
||||
training_role = os.getenv("TRAINING_ROLE",
|
||||
"TRAINER") # get the training role: trainer/pserver
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=trainers)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
trainer_prog = t.get_trainer_program()
|
||||
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
|
||||
# TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
for pass_id in range(PASS_NUM):
|
||||
accuracy.reset(exe)
|
||||
batch_id = 0
|
||||
for data in train_reader():
|
||||
loss, acc = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost] + accuracy.metrics)
|
||||
pass_acc = accuracy.eval(exe)
|
||||
if batch_id % 100 == 0:
|
||||
print("batch_id %d, loss: %f, acc: %f" %
|
||||
(batch_id, loss, pass_acc))
|
||||
batch_id += 1
|
||||
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
@ -1,89 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import os
|
||||
|
||||
BATCH_SIZE = 128
|
||||
PASS_NUM = 100
|
||||
|
||||
images = fluid.layers.data(name='x', shape=[784], dtype='float32')
|
||||
|
||||
# TODO(aroraabhinav) Add regularization and error clipping after
|
||||
# Issue 7432(https://github.com/PaddlePaddle/Paddle/issues/7432) is resolved.
|
||||
hidden1 = fluid.layers.fc(input=images, size=128, act='relu')
|
||||
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
|
||||
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
|
||||
|
||||
label = fluid.layers.data(name='y', shape=[1], dtype='int64')
|
||||
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
|
||||
optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
|
||||
optimize_ops, params_grads = optimizer.minimize(avg_cost)
|
||||
|
||||
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=8192),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv("TRAINING_ROLE",
|
||||
"TRAINER") # get the training role: trainer/pserver
|
||||
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
trainer_prog = t.get_trainer_program()
|
||||
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
for pass_id in range(PASS_NUM):
|
||||
accuracy.reset(exe)
|
||||
batch_id = 0
|
||||
for data in train_reader():
|
||||
loss, acc = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost] + accuracy.metrics)
|
||||
pass_acc = accuracy.eval(exe)
|
||||
if batch_id % 100 == 0:
|
||||
print("batch_id %d, loss: %f, acc: %f" %
|
||||
(batch_id, loss, pass_acc))
|
||||
batch_id += 1
|
||||
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
@ -1,217 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.core as core
|
||||
import paddle.fluid.layers as layers
|
||||
import paddle.fluid.nets as nets
|
||||
from paddle.fluid.optimizer import SGDOptimizer
|
||||
|
||||
IS_SPARSE = True
|
||||
BATCH_SIZE = 256
|
||||
PASS_NUM = 100
|
||||
|
||||
|
||||
def get_usr_combined_features():
|
||||
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
|
||||
uid = layers.data(name='user_id', shape=[1], dtype='int64')
|
||||
usr_emb = layers.embedding(
|
||||
input=uid,
|
||||
dtype='float32',
|
||||
size=[USR_DICT_SIZE, 32],
|
||||
param_attr='user_table',
|
||||
is_sparse=IS_SPARSE)
|
||||
usr_fc = layers.fc(input=usr_emb, size=32)
|
||||
USR_GENDER_DICT_SIZE = 2
|
||||
|
||||
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
|
||||
usr_gender_emb = layers.embedding(
|
||||
input=usr_gender_id,
|
||||
size=[USR_GENDER_DICT_SIZE, 16],
|
||||
param_attr='gender_table',
|
||||
is_sparse=IS_SPARSE)
|
||||
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
|
||||
|
||||
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
|
||||
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
|
||||
usr_age_emb = layers.embedding(
|
||||
input=usr_age_id,
|
||||
size=[USR_AGE_DICT_SIZE, 16],
|
||||
is_sparse=IS_SPARSE,
|
||||
param_attr='age_table')
|
||||
usr_age_fc = layers.fc(input=usr_age_emb, size=16)
|
||||
|
||||
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
|
||||
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
|
||||
usr_job_emb = layers.embedding(
|
||||
input=usr_job_id,
|
||||
size=[USR_JOB_DICT_SIZE, 16],
|
||||
param_attr='job_table',
|
||||
is_sparse=IS_SPARSE)
|
||||
usr_job_fc = layers.fc(input=usr_job_emb, size=16)
|
||||
|
||||
concat_embed = layers.concat(
|
||||
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)
|
||||
|
||||
usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
|
||||
return usr_combined_features
|
||||
|
||||
|
||||
def get_mov_combined_features():
|
||||
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
|
||||
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
|
||||
mov_emb = layers.embedding(
|
||||
input=mov_id,
|
||||
dtype='float32',
|
||||
size=[MOV_DICT_SIZE, 32],
|
||||
param_attr='movie_table',
|
||||
is_sparse=IS_SPARSE)
|
||||
mov_fc = layers.fc(input=mov_emb, size=32)
|
||||
|
||||
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
|
||||
category_id = layers.data(name='category_id', shape=[1], dtype='int64')
|
||||
mov_categories_emb = layers.embedding(
|
||||
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
|
||||
mov_categories_hidden = layers.sequence_pool(
|
||||
input=mov_categories_emb, pool_type="sum")
|
||||
|
||||
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
|
||||
mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64')
|
||||
mov_title_emb = layers.embedding(
|
||||
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
|
||||
mov_title_conv = nets.sequence_conv_pool(
|
||||
input=mov_title_emb,
|
||||
num_filters=32,
|
||||
filter_size=3,
|
||||
act="tanh",
|
||||
pool_type="sum")
|
||||
|
||||
concat_embed = layers.concat(
|
||||
input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)
|
||||
|
||||
mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
|
||||
return mov_combined_features
|
||||
|
||||
|
||||
def model():
|
||||
usr_combined_features = get_usr_combined_features()
|
||||
mov_combined_features = get_mov_combined_features()
|
||||
|
||||
# need cos sim
|
||||
inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
|
||||
scale_infer = layers.scale(x=inference, scale=5.0)
|
||||
|
||||
label = layers.data(name='score', shape=[1], dtype='float32')
|
||||
square_cost = layers.square_error_cost(input=scale_infer, label=label)
|
||||
avg_cost = layers.mean(square_cost)
|
||||
|
||||
return avg_cost
|
||||
|
||||
|
||||
def func_feed(feeding, data, place):
|
||||
feed_tensors = {}
|
||||
for (key, idx) in feeding.iteritems():
|
||||
tensor = core.LoDTensor()
|
||||
if key != "category_id" and key != "movie_title":
|
||||
if key == "score":
|
||||
numpy_data = np.array(map(lambda x: x[idx], data)).astype(
|
||||
"float32")
|
||||
else:
|
||||
numpy_data = np.array(map(lambda x: x[idx], data)).astype(
|
||||
"int64")
|
||||
else:
|
||||
numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), data)
|
||||
lod_info = [len(item) for item in numpy_data]
|
||||
offset = 0
|
||||
lod = [offset]
|
||||
for item in lod_info:
|
||||
offset += item
|
||||
lod.append(offset)
|
||||
numpy_data = np.concatenate(numpy_data, axis=0)
|
||||
tensor.set_lod([lod])
|
||||
|
||||
numpy_data = numpy_data.reshape([numpy_data.shape[0], 1])
|
||||
tensor.set(numpy_data, place)
|
||||
feed_tensors[key] = tensor
|
||||
return feed_tensors
|
||||
|
||||
|
||||
def main():
|
||||
cost = model()
|
||||
optimizer = SGDOptimizer(learning_rate=0.2)
|
||||
optimize_ops, params_grads = optimizer.minimize(cost)
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.movielens.train(), buf_size=8192),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
exe.run(fluid.default_startup_program())
|
||||
trainer_prog = t.get_trainer_program()
|
||||
|
||||
feeding = {
|
||||
'user_id': 0,
|
||||
'gender_id': 1,
|
||||
'age_id': 2,
|
||||
'job_id': 3,
|
||||
'movie_id': 4,
|
||||
'category_id': 5,
|
||||
'movie_title': 6,
|
||||
'score': 7
|
||||
}
|
||||
|
||||
for pass_id in range(PASS_NUM):
|
||||
for data in train_reader():
|
||||
outs = exe.run(trainer_prog,
|
||||
feed=func_feed(feeding, data, place),
|
||||
fetch_list=[cost])
|
||||
out = np.array(outs[0])
|
||||
print("cost=" + str(out[0]))
|
||||
if out[0] < 6.0:
|
||||
print("Training complete. Average cost is less than 6.0.")
|
||||
# if avg cost less than 6.0, we think our code is good.
|
||||
exit(0)
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,126 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
|
||||
hid_dim=32):
|
||||
emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim])
|
||||
conv_3 = fluid.nets.sequence_conv_pool(
|
||||
input=emb,
|
||||
num_filters=hid_dim,
|
||||
filter_size=3,
|
||||
act="tanh",
|
||||
pool_type="sqrt")
|
||||
conv_4 = fluid.nets.sequence_conv_pool(
|
||||
input=emb,
|
||||
num_filters=hid_dim,
|
||||
filter_size=4,
|
||||
act="tanh",
|
||||
pool_type="sqrt")
|
||||
prediction = fluid.layers.fc(input=[conv_3, conv_4],
|
||||
size=class_dim,
|
||||
act="softmax")
|
||||
cost = fluid.layers.cross_entropy(input=prediction, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002)
|
||||
optimize_ops, params_grads = adam_optimizer.minimize(avg_cost)
|
||||
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
|
||||
return avg_cost, accuracy, accuracy.metrics[0], optimize_ops, params_grads
|
||||
|
||||
|
||||
def to_lodtensor(data, place):
|
||||
seq_lens = [len(seq) for seq in data]
|
||||
cur_len = 0
|
||||
lod = [cur_len]
|
||||
for l in seq_lens:
|
||||
cur_len += l
|
||||
lod.append(cur_len)
|
||||
flattened_data = np.concatenate(data, axis=0).astype("int64")
|
||||
flattened_data = flattened_data.reshape([len(flattened_data), 1])
|
||||
res = fluid.LoDTensor()
|
||||
res.set(flattened_data, place)
|
||||
res.set_lod([lod])
|
||||
return res
|
||||
|
||||
|
||||
def main():
|
||||
BATCH_SIZE = 100
|
||||
PASS_NUM = 5
|
||||
|
||||
word_dict = paddle.dataset.imdb.word_dict()
|
||||
dict_dim = len(word_dict)
|
||||
class_dim = 2
|
||||
|
||||
data = fluid.layers.data(
|
||||
name="words", shape=[1], dtype="int64", lod_level=1)
|
||||
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
||||
cost, accuracy, acc_out, optimize_ops, params_grads = convolution_net(
|
||||
data, label, input_dim=dict_dim, class_dim=class_dim)
|
||||
|
||||
train_data = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.imdb.train(word_dict), buf_size=1000),
|
||||
batch_size=BATCH_SIZE)
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv(
|
||||
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
exe.run(fluid.default_startup_program())
|
||||
trainer_prog = t.get_trainer_program()
|
||||
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
|
||||
|
||||
for pass_id in xrange(PASS_NUM):
|
||||
accuracy.reset(exe)
|
||||
for data in train_data():
|
||||
cost_val, acc_val = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[cost, acc_out])
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
|
||||
" pass_acc=" + str(pass_acc))
|
||||
if cost_val < 1.0 and pass_acc > 0.8:
|
||||
exit(0)
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,136 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
|
||||
|
||||
def stacked_lstm_net(data,
|
||||
label,
|
||||
input_dim,
|
||||
class_dim=2,
|
||||
emb_dim=128,
|
||||
hid_dim=512,
|
||||
stacked_num=3):
|
||||
assert stacked_num % 2 == 1
|
||||
|
||||
emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim])
|
||||
# add bias attr
|
||||
|
||||
# TODO(qijun) linear act
|
||||
fc1 = fluid.layers.fc(input=emb, size=hid_dim)
|
||||
lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
|
||||
|
||||
inputs = [fc1, lstm1]
|
||||
|
||||
for i in range(2, stacked_num + 1):
|
||||
fc = fluid.layers.fc(input=inputs, size=hid_dim)
|
||||
lstm, cell = fluid.layers.dynamic_lstm(
|
||||
input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
|
||||
inputs = [fc, lstm]
|
||||
|
||||
fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
|
||||
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
|
||||
|
||||
prediction = fluid.layers.fc(input=[fc_last, lstm_last],
|
||||
size=class_dim,
|
||||
act='softmax')
|
||||
cost = fluid.layers.cross_entropy(input=prediction, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002)
|
||||
optimize_ops, params_grads = adam_optimizer.minimize(avg_cost)
|
||||
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
|
||||
return avg_cost, accuracy, accuracy.metrics[0], optimize_ops, params_grads
|
||||
|
||||
|
||||
def to_lodtensor(data, place):
|
||||
seq_lens = [len(seq) for seq in data]
|
||||
cur_len = 0
|
||||
lod = [cur_len]
|
||||
for l in seq_lens:
|
||||
cur_len += l
|
||||
lod.append(cur_len)
|
||||
flattened_data = np.concatenate(data, axis=0).astype("int64")
|
||||
flattened_data = flattened_data.reshape([len(flattened_data), 1])
|
||||
res = fluid.LoDTensor()
|
||||
res.set(flattened_data, place)
|
||||
res.set_lod([lod])
|
||||
return res
|
||||
|
||||
|
||||
def main():
|
||||
BATCH_SIZE = 100
|
||||
PASS_NUM = 5
|
||||
|
||||
word_dict = paddle.dataset.imdb.word_dict()
|
||||
print "loaded word dict successfully"
|
||||
dict_dim = len(word_dict)
|
||||
class_dim = 2
|
||||
|
||||
data = fluid.layers.data(
|
||||
name="words", shape=[1], dtype="int64", lod_level=1)
|
||||
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
||||
cost, accuracy, acc_out, optimize_ops, params_grads = stacked_lstm_net(
|
||||
data, label, input_dim=dict_dim, class_dim=class_dim)
|
||||
|
||||
train_data = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.imdb.train(word_dict), buf_size=1000),
|
||||
batch_size=BATCH_SIZE)
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
|
||||
|
||||
t = fluid.DistributeTranspiler()
|
||||
# all parameter server endpoints list for spliting parameters
|
||||
pserver_endpoints = os.getenv("PSERVERS")
|
||||
# server endpoint for current node
|
||||
current_endpoint = os.getenv("SERVER_ENDPOINT")
|
||||
# run as trainer or parameter server
|
||||
training_role = os.getenv(
|
||||
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
|
||||
|
||||
t.transpile(
|
||||
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
if training_role == "PSERVER":
|
||||
if not current_endpoint:
|
||||
print("need env SERVER_ENDPOINT")
|
||||
exit(1)
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
elif training_role == "TRAINER":
|
||||
exe.run(fluid.default_startup_program())
|
||||
trainer_prog = t.get_trainer_program()
|
||||
for pass_id in xrange(PASS_NUM):
|
||||
accuracy.reset(exe)
|
||||
for data in train_data():
|
||||
cost_val, acc_val = exe.run(trainer_prog,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[cost, acc_out])
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
|
||||
" pass_acc=" + str(pass_acc))
|
||||
if cost_val < 1.0 and acc_val > 0.8:
|
||||
exit(0)
|
||||
else:
|
||||
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in new issue