commit
bdb21f6bc3
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,180 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
import tensorflow as tf
|
||||
import paddle.v2 as paddle
|
||||
|
||||
DTYPE = tf.float32
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser("mnist model benchmark.")
|
||||
parser.add_argument(
|
||||
'--batch_size', type=int, default=128, help='The minibatch size.')
|
||||
parser.add_argument(
|
||||
'--iterations', type=int, default=35, help='The number of minibatches.')
|
||||
parser.add_argument(
|
||||
'--pass_num', type=int, default=5, help='The number of passes.')
|
||||
parser.add_argument(
|
||||
'--device',
|
||||
type=str,
|
||||
default='GPU',
|
||||
choices=['CPU', 'GPU'],
|
||||
help='The device type.')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def run_benchmark(args):
|
||||
def weight_variable(dtype, shape):
|
||||
initial = tf.truncated_normal(shape, stddev=0.1, dtype=dtype)
|
||||
return tf.Variable(initial)
|
||||
|
||||
def bias_variable(dtype, shape):
|
||||
initial = tf.constant(0.1, shape=shape, dtype=dtype)
|
||||
return tf.Variable(initial)
|
||||
|
||||
device = '/cpu:0' if args.device == 'CPU' else '/device:GPU:0'
|
||||
with tf.device(device):
|
||||
images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
|
||||
labels = tf.placeholder(tf.int64, shape=(None, ))
|
||||
|
||||
# conv1, relu, pool1
|
||||
conv1_weights = weight_variable(DTYPE, [5, 5, 1, 20])
|
||||
conv1_bias = bias_variable(DTYPE, [20])
|
||||
conv1 = tf.nn.conv2d(
|
||||
images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
|
||||
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
|
||||
pool1 = tf.nn.max_pool(
|
||||
relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
# conv2, relu, pool2
|
||||
conv2_weights = weight_variable(DTYPE, [5, 5, 20, 50])
|
||||
conv2_bias = bias_variable(DTYPE, [50])
|
||||
conv2 = tf.nn.conv2d(
|
||||
pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
|
||||
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
|
||||
pool2 = tf.nn.max_pool(
|
||||
relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||
|
||||
# FC
|
||||
pool_shape = pool2.get_shape().as_list()
|
||||
hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
|
||||
reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
|
||||
fc_weights = weight_variable(DTYPE, [hidden_dim, 10])
|
||||
fc_bias = bias_variable(DTYPE, [10])
|
||||
logits = tf.matmul(reshape, fc_weights) + fc_bias
|
||||
|
||||
# Get prediction
|
||||
prediction = tf.nn.softmax(logits)
|
||||
|
||||
# Loss
|
||||
one_hot_labels = tf.one_hot(labels, depth=10)
|
||||
cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
|
||||
avg_cost = tf.reduce_mean(cost)
|
||||
|
||||
# Get accuracy
|
||||
correct = tf.equal(tf.argmax(prediction, 1), labels)
|
||||
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
|
||||
|
||||
# metrics, g_accuracy
|
||||
with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
|
||||
g_accuracy = tf.metrics.accuracy(
|
||||
labels, tf.argmax(
|
||||
prediction, axis=1))
|
||||
vars = tf.contrib.framework.get_variables(
|
||||
scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
|
||||
g_accuracy_reset_op = tf.variables_initializer(vars)
|
||||
|
||||
# Optimizer
|
||||
opt = tf.train.AdamOptimizer(
|
||||
learning_rate=0.001, beta1=0.9, beta2=0.999)
|
||||
train_op = opt.minimize(avg_cost)
|
||||
# train_op = tf.train.AdamOptimizer(1e-4).minimize(avg_cost)
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.dataset.mnist.train(), batch_size=args.batch_size)
|
||||
test_reader = paddle.batch(
|
||||
paddle.dataset.mnist.test(), batch_size=args.batch_size)
|
||||
|
||||
def eval_test():
|
||||
sess.run(g_accuracy_reset_op)
|
||||
for batch_id, data in enumerate(test_reader()):
|
||||
images_data = np.array(
|
||||
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
|
||||
labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||
|
||||
loss, acc, g_acc = sess.run(
|
||||
[avg_cost, accuracy, g_accuracy],
|
||||
feed_dict={images: images_data,
|
||||
labels: labels_data})
|
||||
return g_acc[1]
|
||||
|
||||
config = tf.ConfigProto(
|
||||
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
|
||||
config.gpu_options.allow_growth = True
|
||||
|
||||
with tf.Session(config=config) as sess:
|
||||
init_g = tf.global_variables_initializer()
|
||||
init_l = tf.local_variables_initializer()
|
||||
sess.run(init_g)
|
||||
sess.run(init_l)
|
||||
for pass_id in range(args.pass_num):
|
||||
sess.run(g_accuracy_reset_op)
|
||||
|
||||
pass_start = time.time()
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
images_data = np.array(
|
||||
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
|
||||
labels_data = np.array(map(lambda x: x[1], data)).astype(
|
||||
"int64")
|
||||
|
||||
start = time.time()
|
||||
_, loss, acc, g_acc = sess.run(
|
||||
[train_op, avg_cost, accuracy, g_accuracy],
|
||||
feed_dict={images: images_data,
|
||||
labels: labels_data})
|
||||
end = time.time()
|
||||
|
||||
print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
|
||||
(pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
|
||||
|
||||
pass_end = time.time()
|
||||
test_avg_acc = eval_test()
|
||||
|
||||
print(
|
||||
"pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f"
|
||||
% (pass_id, g_acc[1], test_avg_acc,
|
||||
(pass_end - pass_start) / 1000))
|
||||
|
||||
|
||||
def print_arguments(args):
|
||||
print('----------- Configuration Arguments -----------')
|
||||
for arg, value in sorted(vars(args).iteritems()):
|
||||
print('%s: %s' % (arg, value))
|
||||
print('------------------------------------------------')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
print_arguments(args)
|
||||
run_benchmark(args)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,220 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import argparse
|
||||
import time
|
||||
import tensorflow as tf
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser("LSTM model benchmark.")
|
||||
parser.add_argument(
|
||||
'--batch_size',
|
||||
type=int,
|
||||
default=32,
|
||||
help='The sequence number of a batch data. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--stacked_num',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Number of lstm layers to stack. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--embedding_dim',
|
||||
type=int,
|
||||
default=512,
|
||||
help='Dimension of embedding table. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--hidden_dim',
|
||||
type=int,
|
||||
default=512,
|
||||
help='Hidden size of lstm unit. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--pass_num',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Epoch number to train. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--learning_rate',
|
||||
type=float,
|
||||
default=0.0002,
|
||||
help='Learning rate used to train. (default: %(default)f)')
|
||||
parser.add_argument(
|
||||
'--infer_only', action='store_true', help='If set, run forward only.')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def print_arguments(args):
|
||||
print('----------- Configuration Arguments -----------')
|
||||
for arg, value in sorted(vars(args).iteritems()):
|
||||
print('%s: %s' % (arg, value))
|
||||
print('------------------------------------------------')
|
||||
|
||||
|
||||
def dynamic_lstm_model(dict_size,
|
||||
embedding_dim,
|
||||
hidden_dim,
|
||||
stacked_num,
|
||||
class_num=2,
|
||||
is_train=True):
|
||||
word_idx = tf.placeholder(tf.int64, shape=[None, None])
|
||||
sequence_length = tf.placeholder(tf.int64, shape=[None, ])
|
||||
|
||||
embedding_weights = tf.get_variable('word_embeddings',
|
||||
[dict_size, embedding_dim])
|
||||
embedding = tf.nn.embedding_lookup(embedding_weights, word_idx)
|
||||
|
||||
lstm_cell = tf.nn.rnn_cell.LSTMCell(
|
||||
num_units=hidden_dim, use_peepholes=False)
|
||||
stacked_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * stacked_num)
|
||||
|
||||
# final_state [LSTMTuple(c, h), LSTMTuple(c, h) ...] total stacked_num LSTMTuples
|
||||
_, final_state = tf.nn.dynamic_rnn(
|
||||
cell=stacked_cell,
|
||||
inputs=embedding,
|
||||
dtype=tf.float32,
|
||||
sequence_length=sequence_length)
|
||||
|
||||
w = tf.Variable(
|
||||
tf.truncated_normal([hidden_dim, class_num]), dtype=tf.float32)
|
||||
bias = tf.Variable(
|
||||
tf.constant(
|
||||
value=0.0, shape=[class_num], dtype=tf.float32))
|
||||
prediction = tf.matmul(final_state[-1][1], w) + bias
|
||||
|
||||
if not is_train:
|
||||
return (word_idx, sequence_length), tf.nn.softmax(prediction)
|
||||
|
||||
label = tf.placeholder(tf.int64, shape=[None, ])
|
||||
loss = tf.nn.softmax_cross_entropy_with_logits(
|
||||
labels=tf.one_hot(label, 2), logits=prediction)
|
||||
avg_loss = tf.reduce_mean(loss)
|
||||
|
||||
correct_count = tf.equal(tf.argmax(prediction, 1), label)
|
||||
acc = tf.reduce_mean(tf.cast(correct_count, tf.float32))
|
||||
|
||||
with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
|
||||
g_acc = tf.metrics.accuracy(label, tf.argmax(prediction, axis=1))
|
||||
vars = tf.contrib.framework.get_variables(
|
||||
scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
|
||||
reset_op = tf.variables_initializer(vars)
|
||||
|
||||
return (word_idx, sequence_length, label), avg_loss, acc, g_acc, reset_op
|
||||
|
||||
|
||||
def padding_data(data, padding_size, value):
|
||||
data = data + [value] * padding_size
|
||||
return data[:padding_size]
|
||||
|
||||
|
||||
def train(args):
|
||||
word_dict = paddle.dataset.imdb.word_dict()
|
||||
dict_size = len(word_dict)
|
||||
|
||||
feeding_list, avg_loss, acc, g_acc, reset_op = dynamic_lstm_model(
|
||||
dict_size, args.embedding_dim, args.hidden_dim, args.stacked_num)
|
||||
|
||||
adam_optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
|
||||
train_op = adam_optimizer.minimize(avg_loss)
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.imdb.train(word_dict), buf_size=25000),
|
||||
batch_size=args.batch_size)
|
||||
|
||||
test_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.imdb.test(word_dict), buf_size=25000),
|
||||
batch_size=args.batch_size)
|
||||
|
||||
def do_validation(sess):
|
||||
sess.run(reset_op)
|
||||
for batch_id, data in enumerate(test_reader()):
|
||||
word_idx = map(lambda x: x[0], data)
|
||||
sequence_length = np.array(
|
||||
[len(seq) for seq in word_idx]).astype('int64')
|
||||
maxlen = np.max(sequence_length)
|
||||
word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx]
|
||||
word_idx = np.array(word_idx).astype('int64')
|
||||
label = np.array(map(lambda x: x[1], data)).astype('int64')
|
||||
|
||||
_, loss, fetch_acc, fetch_g_acc = sess.run(
|
||||
[train_op, avg_loss, acc, g_acc],
|
||||
feed_dict={
|
||||
feeding_list[0]: word_idx,
|
||||
feeding_list[1]: sequence_length,
|
||||
feeding_list[2]: label
|
||||
})
|
||||
|
||||
return fetch_g_acc[1]
|
||||
|
||||
config = tf.ConfigProto(
|
||||
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
|
||||
config.gpu_options.allow_growth = True
|
||||
with tf.Session(config=config) as sess:
|
||||
init_g = tf.global_variables_initializer()
|
||||
init_l = tf.local_variables_initializer()
|
||||
sess.run(init_l)
|
||||
sess.run(init_g)
|
||||
|
||||
for pass_id in xrange(args.pass_num):
|
||||
# clear accuracy local variable
|
||||
sess.run(reset_op)
|
||||
pass_start_time = time.time()
|
||||
words_seen = 0
|
||||
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
word_idx = map(lambda x: x[0], data)
|
||||
sequence_length = np.array(
|
||||
[len(seq) for seq in word_idx]).astype('int64')
|
||||
words_seen += np.sum(sequence_length)
|
||||
maxlen = np.max(sequence_length)
|
||||
word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx]
|
||||
word_idx = np.array(word_idx).astype('int64')
|
||||
label = np.array(map(lambda x: x[1], data)).astype('int64')
|
||||
|
||||
_, loss, fetch_acc, fetch_g_acc = sess.run(
|
||||
[train_op, avg_loss, acc, g_acc],
|
||||
feed_dict={
|
||||
feeding_list[0]: word_idx,
|
||||
feeding_list[1]: sequence_length,
|
||||
feeding_list[2]: label
|
||||
})
|
||||
|
||||
print("pass_id=%d, batch_id=%d, loss: %f, acc: %f, avg_acc: %f"
|
||||
% (pass_id, batch_id, loss, fetch_acc, fetch_g_acc[1]))
|
||||
|
||||
pass_end_time = time.time()
|
||||
time_consumed = pass_end_time - pass_start_time
|
||||
words_per_sec = words_seen / time_consumed
|
||||
test_acc = do_validation(sess)
|
||||
print("pass_id=%d, test_acc: %f, words/s: %f, sec/pass: %f" %
|
||||
(pass_id, test_acc, words_per_sec, time_consumed))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
print_arguments(args)
|
||||
|
||||
if args.infer_only:
|
||||
pass
|
||||
else:
|
||||
train(args)
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,15 @@
|
||||
add_subdirectory(detail)
|
||||
|
||||
cc_library(memory SRCS memory.cc DEPS place enforce)
|
||||
cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce)
|
||||
cc_library(memcpy SRCS memcpy.cc DEPS place)
|
||||
|
||||
cc_library(paddle_memory
|
||||
cc_library(memory
|
||||
DEPS
|
||||
memory
|
||||
memcpy
|
||||
meta_data
|
||||
meta_cache
|
||||
memory_block
|
||||
buddy_allocator
|
||||
system_allocator)
|
||||
malloc
|
||||
memcpy)
|
||||
|
||||
cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory)
|
||||
cc_test(malloc_test SRCS malloc_test.cc DEPS malloc)
|
||||
|
||||
#if (WITH_GPU)
|
||||
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place paddle_memory)
|
||||
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
|
||||
#endif()
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue