commit
bdb21f6bc3
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,180 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
DTYPE = tf.float32
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser("mnist model benchmark.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size', type=int, default=128, help='The minibatch size.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--iterations', type=int, default=35, help='The number of minibatches.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num', type=int, default=5, help='The number of passes.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--device',
|
||||||
|
type=str,
|
||||||
|
default='GPU',
|
||||||
|
choices=['CPU', 'GPU'],
|
||||||
|
help='The device type.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(args):
|
||||||
|
def weight_variable(dtype, shape):
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1, dtype=dtype)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
def bias_variable(dtype, shape):
|
||||||
|
initial = tf.constant(0.1, shape=shape, dtype=dtype)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
device = '/cpu:0' if args.device == 'CPU' else '/device:GPU:0'
|
||||||
|
with tf.device(device):
|
||||||
|
images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
|
||||||
|
labels = tf.placeholder(tf.int64, shape=(None, ))
|
||||||
|
|
||||||
|
# conv1, relu, pool1
|
||||||
|
conv1_weights = weight_variable(DTYPE, [5, 5, 1, 20])
|
||||||
|
conv1_bias = bias_variable(DTYPE, [20])
|
||||||
|
conv1 = tf.nn.conv2d(
|
||||||
|
images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
|
||||||
|
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
|
||||||
|
pool1 = tf.nn.max_pool(
|
||||||
|
relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||||
|
|
||||||
|
# conv2, relu, pool2
|
||||||
|
conv2_weights = weight_variable(DTYPE, [5, 5, 20, 50])
|
||||||
|
conv2_bias = bias_variable(DTYPE, [50])
|
||||||
|
conv2 = tf.nn.conv2d(
|
||||||
|
pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
|
||||||
|
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
|
||||||
|
pool2 = tf.nn.max_pool(
|
||||||
|
relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||||
|
|
||||||
|
# FC
|
||||||
|
pool_shape = pool2.get_shape().as_list()
|
||||||
|
hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
|
||||||
|
reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
|
||||||
|
fc_weights = weight_variable(DTYPE, [hidden_dim, 10])
|
||||||
|
fc_bias = bias_variable(DTYPE, [10])
|
||||||
|
logits = tf.matmul(reshape, fc_weights) + fc_bias
|
||||||
|
|
||||||
|
# Get prediction
|
||||||
|
prediction = tf.nn.softmax(logits)
|
||||||
|
|
||||||
|
# Loss
|
||||||
|
one_hot_labels = tf.one_hot(labels, depth=10)
|
||||||
|
cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
|
||||||
|
avg_cost = tf.reduce_mean(cost)
|
||||||
|
|
||||||
|
# Get accuracy
|
||||||
|
correct = tf.equal(tf.argmax(prediction, 1), labels)
|
||||||
|
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
|
||||||
|
|
||||||
|
# metrics, g_accuracy
|
||||||
|
with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
|
||||||
|
g_accuracy = tf.metrics.accuracy(
|
||||||
|
labels, tf.argmax(
|
||||||
|
prediction, axis=1))
|
||||||
|
vars = tf.contrib.framework.get_variables(
|
||||||
|
scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
|
||||||
|
g_accuracy_reset_op = tf.variables_initializer(vars)
|
||||||
|
|
||||||
|
# Optimizer
|
||||||
|
opt = tf.train.AdamOptimizer(
|
||||||
|
learning_rate=0.001, beta1=0.9, beta2=0.999)
|
||||||
|
train_op = opt.minimize(avg_cost)
|
||||||
|
# train_op = tf.train.AdamOptimizer(1e-4).minimize(avg_cost)
|
||||||
|
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.dataset.mnist.train(), batch_size=args.batch_size)
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
paddle.dataset.mnist.test(), batch_size=args.batch_size)
|
||||||
|
|
||||||
|
def eval_test():
|
||||||
|
sess.run(g_accuracy_reset_op)
|
||||||
|
for batch_id, data in enumerate(test_reader()):
|
||||||
|
images_data = np.array(
|
||||||
|
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
|
||||||
|
labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||||
|
|
||||||
|
loss, acc, g_acc = sess.run(
|
||||||
|
[avg_cost, accuracy, g_accuracy],
|
||||||
|
feed_dict={images: images_data,
|
||||||
|
labels: labels_data})
|
||||||
|
return g_acc[1]
|
||||||
|
|
||||||
|
config = tf.ConfigProto(
|
||||||
|
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
|
||||||
|
config.gpu_options.allow_growth = True
|
||||||
|
|
||||||
|
with tf.Session(config=config) as sess:
|
||||||
|
init_g = tf.global_variables_initializer()
|
||||||
|
init_l = tf.local_variables_initializer()
|
||||||
|
sess.run(init_g)
|
||||||
|
sess.run(init_l)
|
||||||
|
for pass_id in range(args.pass_num):
|
||||||
|
sess.run(g_accuracy_reset_op)
|
||||||
|
|
||||||
|
pass_start = time.time()
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
images_data = np.array(
|
||||||
|
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
|
||||||
|
labels_data = np.array(map(lambda x: x[1], data)).astype(
|
||||||
|
"int64")
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
_, loss, acc, g_acc = sess.run(
|
||||||
|
[train_op, avg_cost, accuracy, g_accuracy],
|
||||||
|
feed_dict={images: images_data,
|
||||||
|
labels: labels_data})
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
|
||||||
|
(pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
|
||||||
|
|
||||||
|
pass_end = time.time()
|
||||||
|
test_avg_acc = eval_test()
|
||||||
|
|
||||||
|
print(
|
||||||
|
"pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f"
|
||||||
|
% (pass_id, g_acc[1], test_avg_acc,
|
||||||
|
(pass_end - pass_start) / 1000))
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
print('----------- Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
run_benchmark(args)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,220 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser("LSTM model benchmark.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size',
|
||||||
|
type=int,
|
||||||
|
default=32,
|
||||||
|
help='The sequence number of a batch data. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--stacked_num',
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help='Number of lstm layers to stack. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--embedding_dim',
|
||||||
|
type=int,
|
||||||
|
default=512,
|
||||||
|
help='Dimension of embedding table. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--hidden_dim',
|
||||||
|
type=int,
|
||||||
|
default=512,
|
||||||
|
help='Hidden size of lstm unit. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num',
|
||||||
|
type=int,
|
||||||
|
default=10,
|
||||||
|
help='Epoch number to train. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--learning_rate',
|
||||||
|
type=float,
|
||||||
|
default=0.0002,
|
||||||
|
help='Learning rate used to train. (default: %(default)f)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--infer_only', action='store_true', help='If set, run forward only.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
print('----------- Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
def dynamic_lstm_model(dict_size,
|
||||||
|
embedding_dim,
|
||||||
|
hidden_dim,
|
||||||
|
stacked_num,
|
||||||
|
class_num=2,
|
||||||
|
is_train=True):
|
||||||
|
word_idx = tf.placeholder(tf.int64, shape=[None, None])
|
||||||
|
sequence_length = tf.placeholder(tf.int64, shape=[None, ])
|
||||||
|
|
||||||
|
embedding_weights = tf.get_variable('word_embeddings',
|
||||||
|
[dict_size, embedding_dim])
|
||||||
|
embedding = tf.nn.embedding_lookup(embedding_weights, word_idx)
|
||||||
|
|
||||||
|
lstm_cell = tf.nn.rnn_cell.LSTMCell(
|
||||||
|
num_units=hidden_dim, use_peepholes=False)
|
||||||
|
stacked_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * stacked_num)
|
||||||
|
|
||||||
|
# final_state [LSTMTuple(c, h), LSTMTuple(c, h) ...] total stacked_num LSTMTuples
|
||||||
|
_, final_state = tf.nn.dynamic_rnn(
|
||||||
|
cell=stacked_cell,
|
||||||
|
inputs=embedding,
|
||||||
|
dtype=tf.float32,
|
||||||
|
sequence_length=sequence_length)
|
||||||
|
|
||||||
|
w = tf.Variable(
|
||||||
|
tf.truncated_normal([hidden_dim, class_num]), dtype=tf.float32)
|
||||||
|
bias = tf.Variable(
|
||||||
|
tf.constant(
|
||||||
|
value=0.0, shape=[class_num], dtype=tf.float32))
|
||||||
|
prediction = tf.matmul(final_state[-1][1], w) + bias
|
||||||
|
|
||||||
|
if not is_train:
|
||||||
|
return (word_idx, sequence_length), tf.nn.softmax(prediction)
|
||||||
|
|
||||||
|
label = tf.placeholder(tf.int64, shape=[None, ])
|
||||||
|
loss = tf.nn.softmax_cross_entropy_with_logits(
|
||||||
|
labels=tf.one_hot(label, 2), logits=prediction)
|
||||||
|
avg_loss = tf.reduce_mean(loss)
|
||||||
|
|
||||||
|
correct_count = tf.equal(tf.argmax(prediction, 1), label)
|
||||||
|
acc = tf.reduce_mean(tf.cast(correct_count, tf.float32))
|
||||||
|
|
||||||
|
with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
|
||||||
|
g_acc = tf.metrics.accuracy(label, tf.argmax(prediction, axis=1))
|
||||||
|
vars = tf.contrib.framework.get_variables(
|
||||||
|
scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
|
||||||
|
reset_op = tf.variables_initializer(vars)
|
||||||
|
|
||||||
|
return (word_idx, sequence_length, label), avg_loss, acc, g_acc, reset_op
|
||||||
|
|
||||||
|
|
||||||
|
def padding_data(data, padding_size, value):
|
||||||
|
data = data + [value] * padding_size
|
||||||
|
return data[:padding_size]
|
||||||
|
|
||||||
|
|
||||||
|
def train(args):
|
||||||
|
word_dict = paddle.dataset.imdb.word_dict()
|
||||||
|
dict_size = len(word_dict)
|
||||||
|
|
||||||
|
feeding_list, avg_loss, acc, g_acc, reset_op = dynamic_lstm_model(
|
||||||
|
dict_size, args.embedding_dim, args.hidden_dim, args.stacked_num)
|
||||||
|
|
||||||
|
adam_optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
|
||||||
|
train_op = adam_optimizer.minimize(avg_loss)
|
||||||
|
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.imdb.train(word_dict), buf_size=25000),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.imdb.test(word_dict), buf_size=25000),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
|
||||||
|
def do_validation(sess):
|
||||||
|
sess.run(reset_op)
|
||||||
|
for batch_id, data in enumerate(test_reader()):
|
||||||
|
word_idx = map(lambda x: x[0], data)
|
||||||
|
sequence_length = np.array(
|
||||||
|
[len(seq) for seq in word_idx]).astype('int64')
|
||||||
|
maxlen = np.max(sequence_length)
|
||||||
|
word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx]
|
||||||
|
word_idx = np.array(word_idx).astype('int64')
|
||||||
|
label = np.array(map(lambda x: x[1], data)).astype('int64')
|
||||||
|
|
||||||
|
_, loss, fetch_acc, fetch_g_acc = sess.run(
|
||||||
|
[train_op, avg_loss, acc, g_acc],
|
||||||
|
feed_dict={
|
||||||
|
feeding_list[0]: word_idx,
|
||||||
|
feeding_list[1]: sequence_length,
|
||||||
|
feeding_list[2]: label
|
||||||
|
})
|
||||||
|
|
||||||
|
return fetch_g_acc[1]
|
||||||
|
|
||||||
|
config = tf.ConfigProto(
|
||||||
|
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
|
||||||
|
config.gpu_options.allow_growth = True
|
||||||
|
with tf.Session(config=config) as sess:
|
||||||
|
init_g = tf.global_variables_initializer()
|
||||||
|
init_l = tf.local_variables_initializer()
|
||||||
|
sess.run(init_l)
|
||||||
|
sess.run(init_g)
|
||||||
|
|
||||||
|
for pass_id in xrange(args.pass_num):
|
||||||
|
# clear accuracy local variable
|
||||||
|
sess.run(reset_op)
|
||||||
|
pass_start_time = time.time()
|
||||||
|
words_seen = 0
|
||||||
|
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
word_idx = map(lambda x: x[0], data)
|
||||||
|
sequence_length = np.array(
|
||||||
|
[len(seq) for seq in word_idx]).astype('int64')
|
||||||
|
words_seen += np.sum(sequence_length)
|
||||||
|
maxlen = np.max(sequence_length)
|
||||||
|
word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx]
|
||||||
|
word_idx = np.array(word_idx).astype('int64')
|
||||||
|
label = np.array(map(lambda x: x[1], data)).astype('int64')
|
||||||
|
|
||||||
|
_, loss, fetch_acc, fetch_g_acc = sess.run(
|
||||||
|
[train_op, avg_loss, acc, g_acc],
|
||||||
|
feed_dict={
|
||||||
|
feeding_list[0]: word_idx,
|
||||||
|
feeding_list[1]: sequence_length,
|
||||||
|
feeding_list[2]: label
|
||||||
|
})
|
||||||
|
|
||||||
|
print("pass_id=%d, batch_id=%d, loss: %f, acc: %f, avg_acc: %f"
|
||||||
|
% (pass_id, batch_id, loss, fetch_acc, fetch_g_acc[1]))
|
||||||
|
|
||||||
|
pass_end_time = time.time()
|
||||||
|
time_consumed = pass_end_time - pass_start_time
|
||||||
|
words_per_sec = words_seen / time_consumed
|
||||||
|
test_acc = do_validation(sess)
|
||||||
|
print("pass_id=%d, test_acc: %f, words/s: %f, sec/pass: %f" %
|
||||||
|
(pass_id, test_acc, words_per_sec, time_consumed))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
|
||||||
|
if args.infer_only:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
train(args)
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,15 @@
|
|||||||
add_subdirectory(detail)
|
add_subdirectory(detail)
|
||||||
|
|
||||||
cc_library(memory SRCS memory.cc DEPS place enforce)
|
cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce)
|
||||||
cc_library(memcpy SRCS memcpy.cc DEPS place)
|
cc_library(memcpy SRCS memcpy.cc DEPS place)
|
||||||
|
|
||||||
cc_library(paddle_memory
|
cc_library(memory
|
||||||
DEPS
|
DEPS
|
||||||
memory
|
malloc
|
||||||
memcpy
|
memcpy)
|
||||||
meta_data
|
|
||||||
meta_cache
|
|
||||||
memory_block
|
|
||||||
buddy_allocator
|
|
||||||
system_allocator)
|
|
||||||
|
|
||||||
cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory)
|
cc_test(malloc_test SRCS malloc_test.cc DEPS malloc)
|
||||||
|
|
||||||
#if (WITH_GPU)
|
#if (WITH_GPU)
|
||||||
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place paddle_memory)
|
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
|
||||||
#endif()
|
#endif()
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue