Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into mpi_enabled
commit
d2ba05a671
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,224 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
import paddle.fluid.profiler as profiler
|
||||||
|
|
||||||
|
SEED = 1
|
||||||
|
DTYPE = "float32"
|
||||||
|
|
||||||
|
# random seed must set before configuring the network.
|
||||||
|
# fluid.default_startup_program().random_seed = SEED
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser("mnist model benchmark.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size', type=int, default=128, help='The minibatch size.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--skip_batch_num',
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help='The first num of minibatch num to skip, for better performance test'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--iterations', type=int, default=35, help='The number of minibatches.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num', type=int, default=5, help='The number of passes.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--device',
|
||||||
|
type=str,
|
||||||
|
default='GPU',
|
||||||
|
choices=['CPU', 'GPU'],
|
||||||
|
help='The device type.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--infer_only', action='store_true', help='If set, run forward only.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_cprof', action='store_true', help='If set, use cProfile.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_nvprof',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, use nvprof for CUDA.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--with_test',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, test the testset during training.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def cnn_model(data):
|
||||||
|
conv_pool_1 = fluid.nets.simple_img_conv_pool(
|
||||||
|
input=data,
|
||||||
|
filter_size=5,
|
||||||
|
num_filters=20,
|
||||||
|
pool_size=2,
|
||||||
|
pool_stride=2,
|
||||||
|
act="relu")
|
||||||
|
conv_pool_2 = fluid.nets.simple_img_conv_pool(
|
||||||
|
input=conv_pool_1,
|
||||||
|
filter_size=5,
|
||||||
|
num_filters=50,
|
||||||
|
pool_size=2,
|
||||||
|
pool_stride=2,
|
||||||
|
act="relu")
|
||||||
|
|
||||||
|
# TODO(dzhwinter) : refine the initializer and random seed settting
|
||||||
|
SIZE = 10
|
||||||
|
input_shape = conv_pool_2.shape
|
||||||
|
param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
|
||||||
|
scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5
|
||||||
|
|
||||||
|
predict = fluid.layers.fc(
|
||||||
|
input=conv_pool_2,
|
||||||
|
size=SIZE,
|
||||||
|
act="softmax",
|
||||||
|
param_attr=fluid.param_attr.ParamAttr(
|
||||||
|
initializer=fluid.initializer.NormalInitializer(
|
||||||
|
loc=0.0, scale=scale)))
|
||||||
|
return predict
|
||||||
|
|
||||||
|
|
||||||
|
def eval_test(exe, batch_acc, batch_size_tensor, inference_program):
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
paddle.dataset.mnist.test(), batch_size=args.batch_size)
|
||||||
|
test_pass_acc = fluid.average.WeightedAverage()
|
||||||
|
for batch_id, data in enumerate(test_reader()):
|
||||||
|
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
|
||||||
|
data)).astype(DTYPE)
|
||||||
|
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||||
|
y_data = y_data.reshape([len(y_data), 1])
|
||||||
|
|
||||||
|
acc, weight = exe.run(inference_program,
|
||||||
|
feed={"pixel": img_data,
|
||||||
|
"label": y_data},
|
||||||
|
fetch_list=[batch_acc, batch_size_tensor])
|
||||||
|
test_pass_acc.add(value=acc, weight=weight)
|
||||||
|
pass_acc = test_pass_acc.eval()
|
||||||
|
return pass_acc
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(model, args):
|
||||||
|
if args.use_cprof:
|
||||||
|
pr = cProfile.Profile()
|
||||||
|
pr.enable()
|
||||||
|
start_time = time.time()
|
||||||
|
# Input data
|
||||||
|
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
|
||||||
|
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||||
|
|
||||||
|
# Train program
|
||||||
|
predict = model(images)
|
||||||
|
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||||
|
avg_cost = fluid.layers.mean(x=cost)
|
||||||
|
|
||||||
|
# Evaluator
|
||||||
|
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
|
||||||
|
batch_acc = fluid.layers.accuracy(
|
||||||
|
input=predict, label=label, total=batch_size_tensor)
|
||||||
|
|
||||||
|
# inference program
|
||||||
|
inference_program = fluid.default_main_program().clone()
|
||||||
|
|
||||||
|
# Optimization
|
||||||
|
opt = fluid.optimizer.AdamOptimizer(
|
||||||
|
learning_rate=0.001, beta1=0.9, beta2=0.999)
|
||||||
|
opt.minimize(avg_cost)
|
||||||
|
|
||||||
|
fluid.memory_optimize(fluid.default_main_program())
|
||||||
|
|
||||||
|
# Initialize executor
|
||||||
|
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
|
||||||
|
# Parameter initialization
|
||||||
|
exe.run(fluid.default_startup_program())
|
||||||
|
|
||||||
|
# Reader
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.dataset.mnist.train(), batch_size=args.batch_size)
|
||||||
|
|
||||||
|
accuracy = fluid.metrics.Accuracy()
|
||||||
|
iters, num_samples, start_time = 0, 0, time.time()
|
||||||
|
for pass_id in range(args.pass_num):
|
||||||
|
accuracy.reset()
|
||||||
|
train_accs = []
|
||||||
|
train_losses = []
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
if iters == args.skip_batch_num:
|
||||||
|
start_time = time.time()
|
||||||
|
num_samples = 0
|
||||||
|
if iters == args.iterations:
|
||||||
|
break
|
||||||
|
img_data = np.array(
|
||||||
|
map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE)
|
||||||
|
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||||
|
y_data = y_data.reshape([len(y_data), 1])
|
||||||
|
|
||||||
|
outs = exe.run(
|
||||||
|
fluid.default_main_program(),
|
||||||
|
feed={"pixel": img_data,
|
||||||
|
"label": y_data},
|
||||||
|
fetch_list=[avg_cost, batch_acc, batch_size_tensor]
|
||||||
|
) # The accuracy is the accumulation of batches, but not the current batch.
|
||||||
|
accuracy.update(value=outs[1], weight=outs[2])
|
||||||
|
iters += 1
|
||||||
|
num_samples += len(y_data)
|
||||||
|
loss = np.array(outs[0])
|
||||||
|
acc = np.array(outs[1])
|
||||||
|
train_losses.append(loss)
|
||||||
|
train_accs.append(acc)
|
||||||
|
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
|
||||||
|
(pass_id, iters, loss, acc))
|
||||||
|
|
||||||
|
print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
|
||||||
|
(pass_id, np.mean(train_losses), np.mean(train_accs)))
|
||||||
|
train_elapsed = time.time() - start_time
|
||||||
|
examples_per_sec = num_samples / train_elapsed
|
||||||
|
|
||||||
|
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
|
||||||
|
(num_samples, train_elapsed, examples_per_sec))
|
||||||
|
# evaluation
|
||||||
|
if args.with_test:
|
||||||
|
test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor,
|
||||||
|
inference_program)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
|
||||||
|
vars(args)['device'] == 'GPU')
|
||||||
|
print('----------- mnist Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
if args.use_nvprof and args.device == 'GPU':
|
||||||
|
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
|
||||||
|
run_benchmark(cnn_model, args)
|
||||||
|
else:
|
||||||
|
run_benchmark(cnn_model, args)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,105 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# This script benchmarking the PaddlePaddle Fluid on
|
||||||
|
# single thread single GPU.
|
||||||
|
|
||||||
|
#export FLAGS_fraction_of_gpu_memory_to_use=0.0
|
||||||
|
export CUDNN_PATH=/paddle/cudnn_v5
|
||||||
|
|
||||||
|
# disable openmp and mkl parallel
|
||||||
|
#https://github.com/PaddlePaddle/Paddle/issues/7199
|
||||||
|
export MKL_NUM_THREADS=1
|
||||||
|
export OMP_NUM_THREADS=1
|
||||||
|
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
|
||||||
|
if [ $ht -eq 1 ]; then # HT is OFF
|
||||||
|
if [ -z "$KMP_AFFINITY" ]; then
|
||||||
|
export KMP_AFFINITY="granularity=fine,compact,0,0"
|
||||||
|
fi
|
||||||
|
if [ -z "$OMP_DYNAMIC" ]; then
|
||||||
|
export OMP_DYNAMIC="FALSE"
|
||||||
|
fi
|
||||||
|
else # HT is ON
|
||||||
|
if [ -z "$KMP_AFFINITY" ]; then
|
||||||
|
export KMP_AFFINITY="granularity=fine,compact,1,0"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
# disable multi-gpu if have more than one
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
|
||||||
|
export LD_LIBRARY_PATH=$CUDNN_PATH:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
# only query the gpu used
|
||||||
|
nohup stdbuf -oL nvidia-smi \
|
||||||
|
--id=${CUDA_VISIBLE_DEVICES} \
|
||||||
|
--query-gpu=timestamp \
|
||||||
|
--query-compute-apps=pid,process_name,used_memory \
|
||||||
|
--format=csv \
|
||||||
|
--filename=mem.log \
|
||||||
|
-l 1 &
|
||||||
|
# mnist
|
||||||
|
# mnist gpu mnist 128
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/mnist.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=128 \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=500 \
|
||||||
|
2>&1 | tee -a mnist_gpu_128.log
|
||||||
|
|
||||||
|
# vgg16
|
||||||
|
# gpu cifar10 128
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/vgg16.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=128 \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=30 \
|
||||||
|
2>&1 | tee -a vgg16_gpu_128.log
|
||||||
|
|
||||||
|
# flowers gpu 128
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/vgg16.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=32 \
|
||||||
|
--data_set=flowers \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=30 \
|
||||||
|
2>&1 | tee -a vgg16_gpu_flowers_32.log
|
||||||
|
|
||||||
|
# resnet50
|
||||||
|
# resnet50 gpu cifar10 128
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/resnet50.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=128 \
|
||||||
|
--data_set=cifar10 \
|
||||||
|
--model=resnet_cifar10 \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=30 \
|
||||||
|
2>&1 | tee -a resnet50_gpu_128.log
|
||||||
|
|
||||||
|
# resnet50 gpu flowers 64
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/resnet50.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=64 \
|
||||||
|
--data_set=flowers \
|
||||||
|
--model=resnet_imagenet \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=30 \
|
||||||
|
2>&1 | tee -a resnet50_gpu_flowers_64.log
|
||||||
|
|
||||||
|
# lstm
|
||||||
|
# lstm gpu imdb 32 # tensorflow only support batch=32
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/stacked_dynamic_lstm.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=32 \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=30 \
|
||||||
|
--hidden_dim=512 \
|
||||||
|
--emb_dim=512 \
|
||||||
|
--crop_size=1500 \
|
||||||
|
2>&1 | tee -a lstm_gpu_32.log
|
||||||
|
|
||||||
|
# seq2seq
|
||||||
|
# seq2seq gpu wmb 128
|
||||||
|
FLAGS_benchmark=true stdbuf -oL python fluid/machine_translation.py \
|
||||||
|
--device=GPU \
|
||||||
|
--batch_size=128 \
|
||||||
|
--skip_batch_num=5 \
|
||||||
|
--iterations=30 \
|
||||||
|
2>&1 | tee -a lstm_gpu_128.log
|
@ -0,0 +1,236 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import cPickle
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
import paddle.v2.dataset.imdb as imdb
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
from paddle.v2 import batch
|
||||||
|
import paddle.fluid.profiler as profiler
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser("Understand Sentiment by Dynamic RNN.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size',
|
||||||
|
type=int,
|
||||||
|
default=32,
|
||||||
|
help='The sequence number of a batch data. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--skip_batch_num',
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help='The first num of minibatch num to skip, for better performance test'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--iterations', type=int, default=80, help='The number of minibatches.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--emb_dim',
|
||||||
|
type=int,
|
||||||
|
default=512,
|
||||||
|
help='Dimension of embedding table. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--hidden_dim',
|
||||||
|
type=int,
|
||||||
|
default=512,
|
||||||
|
help='Hidden size of lstm unit. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num',
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help='Epoch number to train. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--device',
|
||||||
|
type=str,
|
||||||
|
default='CPU',
|
||||||
|
choices=['CPU', 'GPU'],
|
||||||
|
help='The device type.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--crop_size',
|
||||||
|
type=int,
|
||||||
|
default=int(os.environ.get('CROP_SIZE', '1500')),
|
||||||
|
help='The max sentence length of input. Since this model use plain RNN,'
|
||||||
|
' Gradient could be explored if sentence is too long')
|
||||||
|
parser.add_argument(
|
||||||
|
'--with_test',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, test the testset during training.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
word_dict = imdb.word_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def crop_sentence(reader, crop_size):
|
||||||
|
unk_value = word_dict['<unk>']
|
||||||
|
|
||||||
|
def __impl__():
|
||||||
|
for item in reader():
|
||||||
|
if len([x for x in item[0] if x != unk_value]) < crop_size:
|
||||||
|
yield item
|
||||||
|
|
||||||
|
return __impl__
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
lstm_size = args.hidden_dim
|
||||||
|
|
||||||
|
data = fluid.layers.data(
|
||||||
|
name="words", shape=[1], lod_level=1, dtype='int64')
|
||||||
|
sentence = fluid.layers.embedding(
|
||||||
|
input=data, size=[len(word_dict), args.emb_dim])
|
||||||
|
|
||||||
|
sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')
|
||||||
|
|
||||||
|
rnn = fluid.layers.DynamicRNN()
|
||||||
|
with rnn.block():
|
||||||
|
word = rnn.step_input(sentence)
|
||||||
|
prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
|
||||||
|
prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
|
||||||
|
|
||||||
|
def gate_common(
|
||||||
|
ipt,
|
||||||
|
hidden,
|
||||||
|
size, ):
|
||||||
|
gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
|
||||||
|
gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
|
||||||
|
gate = fluid.layers.sums(input=[gate0, gate1])
|
||||||
|
return gate
|
||||||
|
|
||||||
|
forget_gate = fluid.layers.sigmoid(
|
||||||
|
x=gate_common(word, prev_hidden, lstm_size))
|
||||||
|
input_gate = fluid.layers.sigmoid(
|
||||||
|
x=gate_common(word, prev_hidden, lstm_size))
|
||||||
|
output_gate = fluid.layers.sigmoid(
|
||||||
|
x=gate_common(word, prev_hidden, lstm_size))
|
||||||
|
cell_gate = fluid.layers.tanh(
|
||||||
|
x=gate_common(word, prev_hidden, lstm_size))
|
||||||
|
|
||||||
|
cell = fluid.layers.sums(input=[
|
||||||
|
fluid.layers.elementwise_mul(
|
||||||
|
x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
|
||||||
|
x=input_gate, y=cell_gate)
|
||||||
|
])
|
||||||
|
|
||||||
|
hidden = fluid.layers.elementwise_mul(
|
||||||
|
x=output_gate, y=fluid.layers.tanh(x=cell))
|
||||||
|
|
||||||
|
rnn.update_memory(prev_cell, cell)
|
||||||
|
rnn.update_memory(prev_hidden, hidden)
|
||||||
|
rnn.output(hidden)
|
||||||
|
|
||||||
|
last = fluid.layers.sequence_pool(rnn(), 'last')
|
||||||
|
logit = fluid.layers.fc(input=last, size=2, act='softmax')
|
||||||
|
loss = fluid.layers.cross_entropy(
|
||||||
|
input=logit,
|
||||||
|
label=fluid.layers.data(
|
||||||
|
name='label', shape=[1], dtype='int64'))
|
||||||
|
loss = fluid.layers.mean(x=loss)
|
||||||
|
|
||||||
|
# add acc
|
||||||
|
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
|
||||||
|
batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
|
||||||
|
shape=[1], dtype='int64'), total=batch_size_tensor)
|
||||||
|
|
||||||
|
inference_program = fluid.default_main_program().clone()
|
||||||
|
with fluid.program_guard(inference_program):
|
||||||
|
inference_program = fluid.io.get_inference_program(
|
||||||
|
target_vars=[batch_acc, batch_size_tensor])
|
||||||
|
|
||||||
|
adam = fluid.optimizer.Adam()
|
||||||
|
adam.minimize(loss)
|
||||||
|
|
||||||
|
fluid.memory_optimize(fluid.default_main_program())
|
||||||
|
|
||||||
|
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
exe.run(fluid.default_startup_program())
|
||||||
|
|
||||||
|
train_reader = batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
crop_sentence(imdb.train(word_dict), args.crop_size),
|
||||||
|
buf_size=25000),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
|
||||||
|
iters, num_samples, start_time = 0, 0, time.time()
|
||||||
|
for pass_id in range(args.pass_num):
|
||||||
|
train_accs = []
|
||||||
|
train_losses = []
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
if iters == args.skip_batch_num:
|
||||||
|
start_time = time.time()
|
||||||
|
num_samples = 0
|
||||||
|
if iters == args.iterations:
|
||||||
|
break
|
||||||
|
tensor_words = to_lodtensor([x[0] for x in data], place)
|
||||||
|
label = numpy.array([x[1] for x in data]).astype("int64")
|
||||||
|
label = label.reshape((-1, 1))
|
||||||
|
loss_np, acc, weight = exe.run(
|
||||||
|
fluid.default_main_program(),
|
||||||
|
feed={"words": tensor_words,
|
||||||
|
"label": label},
|
||||||
|
fetch_list=[loss, batch_acc, batch_size_tensor])
|
||||||
|
iters += 1
|
||||||
|
for x in data:
|
||||||
|
num_samples += len(x[0])
|
||||||
|
print(
|
||||||
|
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
|
||||||
|
(pass_id, iters, loss_np, acc)
|
||||||
|
) # The accuracy is the accumulation of batches, but not the current batch.
|
||||||
|
|
||||||
|
train_elapsed = time.time() - start_time
|
||||||
|
examples_per_sec = num_samples / train_elapsed
|
||||||
|
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
|
||||||
|
(num_samples, train_elapsed, examples_per_sec))
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def to_lodtensor(data, place):
|
||||||
|
seq_lens = [len(seq) for seq in data]
|
||||||
|
cur_len = 0
|
||||||
|
lod = [cur_len]
|
||||||
|
for l in seq_lens:
|
||||||
|
cur_len += l
|
||||||
|
lod.append(cur_len)
|
||||||
|
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
|
||||||
|
flattened_data = flattened_data.reshape([len(flattened_data), 1])
|
||||||
|
res = fluid.LoDTensor()
|
||||||
|
res.set(flattened_data, place)
|
||||||
|
res.set_lod([lod])
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
print('----------- lstm Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
main()
|
@ -0,0 +1,224 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""VGG16 benchmark in Fluid"""
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
import paddle.fluid.core as core
|
||||||
|
import argparse
|
||||||
|
import functools
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size', type=int, default=128, help="Batch size for training.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--skip_batch_num',
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help='The first num of minibatch num to skip, for better performance test')
|
||||||
|
parser.add_argument(
|
||||||
|
'--iterations', type=int, default=80, help='The number of minibatches.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--learning_rate',
|
||||||
|
type=float,
|
||||||
|
default=1e-3,
|
||||||
|
help="Learning rate for training.")
|
||||||
|
parser.add_argument('--pass_num', type=int, default=50, help="No. of passes.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--device',
|
||||||
|
type=str,
|
||||||
|
default='GPU',
|
||||||
|
choices=['CPU', 'GPU'],
|
||||||
|
help="The device type.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--data_format',
|
||||||
|
type=str,
|
||||||
|
default='NCHW',
|
||||||
|
choices=['NCHW', 'NHWC'],
|
||||||
|
help='The data order, now only support NCHW.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--data_set',
|
||||||
|
type=str,
|
||||||
|
default='cifar10',
|
||||||
|
choices=['cifar10', 'flowers'],
|
||||||
|
help='Optional dataset for benchmark.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--with_test',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, test the testset during training.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def vgg16_bn_drop(input):
|
||||||
|
def conv_block(input, num_filter, groups, dropouts):
|
||||||
|
return fluid.nets.img_conv_group(
|
||||||
|
input=input,
|
||||||
|
pool_size=2,
|
||||||
|
pool_stride=2,
|
||||||
|
conv_num_filter=[num_filter] * groups,
|
||||||
|
conv_filter_size=3,
|
||||||
|
conv_act='relu',
|
||||||
|
conv_with_batchnorm=True,
|
||||||
|
conv_batchnorm_drop_rate=dropouts,
|
||||||
|
pool_type='max')
|
||||||
|
|
||||||
|
conv1 = conv_block(input, 64, 2, [0.3, 0])
|
||||||
|
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
|
||||||
|
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
|
||||||
|
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
|
||||||
|
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
|
||||||
|
|
||||||
|
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
|
||||||
|
fc1 = fluid.layers.fc(input=drop, size=512, act=None)
|
||||||
|
bn = fluid.layers.batch_norm(input=fc1, act='relu')
|
||||||
|
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
|
||||||
|
fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
|
||||||
|
return fc2
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if args.data_set == "cifar10":
|
||||||
|
classdim = 10
|
||||||
|
if args.data_format == 'NCHW':
|
||||||
|
data_shape = [3, 32, 32]
|
||||||
|
else:
|
||||||
|
data_shape = [32, 32, 3]
|
||||||
|
else:
|
||||||
|
classdim = 102
|
||||||
|
if args.data_format == 'NCHW':
|
||||||
|
data_shape = [3, 224, 224]
|
||||||
|
else:
|
||||||
|
data_shape = [224, 224, 3]
|
||||||
|
|
||||||
|
# Input data
|
||||||
|
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
|
||||||
|
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||||
|
|
||||||
|
# Train program
|
||||||
|
net = vgg16_bn_drop(images)
|
||||||
|
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
|
||||||
|
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||||
|
avg_cost = fluid.layers.mean(x=cost)
|
||||||
|
|
||||||
|
# Evaluator
|
||||||
|
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
|
||||||
|
batch_acc = fluid.layers.accuracy(
|
||||||
|
input=predict, label=label, total=batch_size_tensor)
|
||||||
|
|
||||||
|
# inference program
|
||||||
|
inference_program = fluid.default_main_program().clone()
|
||||||
|
with fluid.program_guard(inference_program):
|
||||||
|
inference_program = fluid.io.get_inference_program(
|
||||||
|
target_vars=[batch_acc, batch_size_tensor])
|
||||||
|
|
||||||
|
# Optimization
|
||||||
|
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
|
||||||
|
opts = optimizer.minimize(avg_cost)
|
||||||
|
|
||||||
|
fluid.memory_optimize(fluid.default_main_program())
|
||||||
|
|
||||||
|
# Initialize executor
|
||||||
|
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
|
||||||
|
# Parameter initialization
|
||||||
|
exe.run(fluid.default_startup_program())
|
||||||
|
|
||||||
|
# data reader
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.cifar.train10()
|
||||||
|
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
|
||||||
|
buf_size=5120),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
paddle.dataset.cifar.test10()
|
||||||
|
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
|
||||||
|
# test
|
||||||
|
def test(exe):
|
||||||
|
test_accuracy = fluid.average.WeightedAverage()
|
||||||
|
for batch_id, data in enumerate(test_reader()):
|
||||||
|
img_data = np.array(map(lambda x: x[0].reshape(data_shape),
|
||||||
|
data)).astype("float32")
|
||||||
|
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||||
|
y_data = y_data.reshape([-1, 1])
|
||||||
|
|
||||||
|
acc, weight = exe.run(inference_program,
|
||||||
|
feed={"pixel": img_data,
|
||||||
|
"label": y_data},
|
||||||
|
fetch_list=[batch_acc, batch_size_tensor])
|
||||||
|
test_accuracy.add(value=acc, weight=weight)
|
||||||
|
return test_accuracy.eval()
|
||||||
|
|
||||||
|
iters, num_samples, start_time = 0, 0, time.time()
|
||||||
|
accuracy = fluid.average.WeightedAverage()
|
||||||
|
for pass_id in range(args.pass_num):
|
||||||
|
accuracy.reset()
|
||||||
|
train_accs = []
|
||||||
|
train_losses = []
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
if iters == args.skip_batch_num:
|
||||||
|
start_time = time.time()
|
||||||
|
num_samples = 0
|
||||||
|
if iters == args.iterations:
|
||||||
|
break
|
||||||
|
img_data = np.array(map(lambda x: x[0].reshape(data_shape),
|
||||||
|
data)).astype("float32")
|
||||||
|
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||||
|
y_data = y_data.reshape([-1, 1])
|
||||||
|
|
||||||
|
loss, acc, weight = exe.run(
|
||||||
|
fluid.default_main_program(),
|
||||||
|
feed={"pixel": img_data,
|
||||||
|
"label": y_data},
|
||||||
|
fetch_list=[avg_cost, batch_acc, batch_size_tensor])
|
||||||
|
accuracy.add(value=acc, weight=weight)
|
||||||
|
iters += 1
|
||||||
|
num_samples += len(y_data)
|
||||||
|
print(
|
||||||
|
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
|
||||||
|
(pass_id, iters, loss, acc)
|
||||||
|
) # The accuracy is the accumulation of batches, but not the current batch.
|
||||||
|
|
||||||
|
# pass_train_acc = accuracy.eval()
|
||||||
|
train_losses.append(loss)
|
||||||
|
train_accs.append(acc)
|
||||||
|
print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
|
||||||
|
(pass_id, np.mean(train_losses), np.mean(train_accs)))
|
||||||
|
train_elapsed = time.time() - start_time
|
||||||
|
examples_per_sec = num_samples / train_elapsed
|
||||||
|
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
|
||||||
|
(num_samples, train_elapsed, examples_per_sec))
|
||||||
|
# evaluation
|
||||||
|
if args.with_test:
|
||||||
|
pass_test_acc = test(exe)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments():
|
||||||
|
print('----------- vgg Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print_arguments()
|
||||||
|
main()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,180 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
DTYPE = tf.float32
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser("mnist model benchmark.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size', type=int, default=128, help='The minibatch size.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--iterations', type=int, default=35, help='The number of minibatches.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num', type=int, default=5, help='The number of passes.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--device',
|
||||||
|
type=str,
|
||||||
|
default='GPU',
|
||||||
|
choices=['CPU', 'GPU'],
|
||||||
|
help='The device type.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(args):
|
||||||
|
def weight_variable(dtype, shape):
|
||||||
|
initial = tf.truncated_normal(shape, stddev=0.1, dtype=dtype)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
def bias_variable(dtype, shape):
|
||||||
|
initial = tf.constant(0.1, shape=shape, dtype=dtype)
|
||||||
|
return tf.Variable(initial)
|
||||||
|
|
||||||
|
device = '/cpu:0' if args.device == 'CPU' else '/device:GPU:0'
|
||||||
|
with tf.device(device):
|
||||||
|
images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
|
||||||
|
labels = tf.placeholder(tf.int64, shape=(None, ))
|
||||||
|
|
||||||
|
# conv1, relu, pool1
|
||||||
|
conv1_weights = weight_variable(DTYPE, [5, 5, 1, 20])
|
||||||
|
conv1_bias = bias_variable(DTYPE, [20])
|
||||||
|
conv1 = tf.nn.conv2d(
|
||||||
|
images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
|
||||||
|
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
|
||||||
|
pool1 = tf.nn.max_pool(
|
||||||
|
relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||||
|
|
||||||
|
# conv2, relu, pool2
|
||||||
|
conv2_weights = weight_variable(DTYPE, [5, 5, 20, 50])
|
||||||
|
conv2_bias = bias_variable(DTYPE, [50])
|
||||||
|
conv2 = tf.nn.conv2d(
|
||||||
|
pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
|
||||||
|
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
|
||||||
|
pool2 = tf.nn.max_pool(
|
||||||
|
relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
|
||||||
|
|
||||||
|
# FC
|
||||||
|
pool_shape = pool2.get_shape().as_list()
|
||||||
|
hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
|
||||||
|
reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
|
||||||
|
fc_weights = weight_variable(DTYPE, [hidden_dim, 10])
|
||||||
|
fc_bias = bias_variable(DTYPE, [10])
|
||||||
|
logits = tf.matmul(reshape, fc_weights) + fc_bias
|
||||||
|
|
||||||
|
# Get prediction
|
||||||
|
prediction = tf.nn.softmax(logits)
|
||||||
|
|
||||||
|
# Loss
|
||||||
|
one_hot_labels = tf.one_hot(labels, depth=10)
|
||||||
|
cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
|
||||||
|
avg_cost = tf.reduce_mean(cost)
|
||||||
|
|
||||||
|
# Get accuracy
|
||||||
|
correct = tf.equal(tf.argmax(prediction, 1), labels)
|
||||||
|
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
|
||||||
|
|
||||||
|
# metrics, g_accuracy
|
||||||
|
with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
|
||||||
|
g_accuracy = tf.metrics.accuracy(
|
||||||
|
labels, tf.argmax(
|
||||||
|
prediction, axis=1))
|
||||||
|
vars = tf.contrib.framework.get_variables(
|
||||||
|
scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
|
||||||
|
g_accuracy_reset_op = tf.variables_initializer(vars)
|
||||||
|
|
||||||
|
# Optimizer
|
||||||
|
opt = tf.train.AdamOptimizer(
|
||||||
|
learning_rate=0.001, beta1=0.9, beta2=0.999)
|
||||||
|
train_op = opt.minimize(avg_cost)
|
||||||
|
# train_op = tf.train.AdamOptimizer(1e-4).minimize(avg_cost)
|
||||||
|
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.dataset.mnist.train(), batch_size=args.batch_size)
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
paddle.dataset.mnist.test(), batch_size=args.batch_size)
|
||||||
|
|
||||||
|
def eval_test():
|
||||||
|
sess.run(g_accuracy_reset_op)
|
||||||
|
for batch_id, data in enumerate(test_reader()):
|
||||||
|
images_data = np.array(
|
||||||
|
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
|
||||||
|
labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||||
|
|
||||||
|
loss, acc, g_acc = sess.run(
|
||||||
|
[avg_cost, accuracy, g_accuracy],
|
||||||
|
feed_dict={images: images_data,
|
||||||
|
labels: labels_data})
|
||||||
|
return g_acc[1]
|
||||||
|
|
||||||
|
config = tf.ConfigProto(
|
||||||
|
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
|
||||||
|
config.gpu_options.allow_growth = True
|
||||||
|
|
||||||
|
with tf.Session(config=config) as sess:
|
||||||
|
init_g = tf.global_variables_initializer()
|
||||||
|
init_l = tf.local_variables_initializer()
|
||||||
|
sess.run(init_g)
|
||||||
|
sess.run(init_l)
|
||||||
|
for pass_id in range(args.pass_num):
|
||||||
|
sess.run(g_accuracy_reset_op)
|
||||||
|
|
||||||
|
pass_start = time.time()
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
images_data = np.array(
|
||||||
|
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
|
||||||
|
labels_data = np.array(map(lambda x: x[1], data)).astype(
|
||||||
|
"int64")
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
_, loss, acc, g_acc = sess.run(
|
||||||
|
[train_op, avg_cost, accuracy, g_accuracy],
|
||||||
|
feed_dict={images: images_data,
|
||||||
|
labels: labels_data})
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
|
||||||
|
(pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
|
||||||
|
|
||||||
|
pass_end = time.time()
|
||||||
|
test_avg_acc = eval_test()
|
||||||
|
|
||||||
|
print(
|
||||||
|
"pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f"
|
||||||
|
% (pass_id, g_acc[1], test_avg_acc,
|
||||||
|
(pass_end - pass_start) / 1000))
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
print('----------- Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
run_benchmark(args)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,220 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
import paddle.v2 as paddle
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser("LSTM model benchmark.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size',
|
||||||
|
type=int,
|
||||||
|
default=32,
|
||||||
|
help='The sequence number of a batch data. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--stacked_num',
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help='Number of lstm layers to stack. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--embedding_dim',
|
||||||
|
type=int,
|
||||||
|
default=512,
|
||||||
|
help='Dimension of embedding table. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--hidden_dim',
|
||||||
|
type=int,
|
||||||
|
default=512,
|
||||||
|
help='Hidden size of lstm unit. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num',
|
||||||
|
type=int,
|
||||||
|
default=10,
|
||||||
|
help='Epoch number to train. (default: %(default)d)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--learning_rate',
|
||||||
|
type=float,
|
||||||
|
default=0.0002,
|
||||||
|
help='Learning rate used to train. (default: %(default)f)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--infer_only', action='store_true', help='If set, run forward only.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def print_arguments(args):
|
||||||
|
print('----------- Configuration Arguments -----------')
|
||||||
|
for arg, value in sorted(vars(args).iteritems()):
|
||||||
|
print('%s: %s' % (arg, value))
|
||||||
|
print('------------------------------------------------')
|
||||||
|
|
||||||
|
|
||||||
|
def dynamic_lstm_model(dict_size,
|
||||||
|
embedding_dim,
|
||||||
|
hidden_dim,
|
||||||
|
stacked_num,
|
||||||
|
class_num=2,
|
||||||
|
is_train=True):
|
||||||
|
word_idx = tf.placeholder(tf.int64, shape=[None, None])
|
||||||
|
sequence_length = tf.placeholder(tf.int64, shape=[None, ])
|
||||||
|
|
||||||
|
embedding_weights = tf.get_variable('word_embeddings',
|
||||||
|
[dict_size, embedding_dim])
|
||||||
|
embedding = tf.nn.embedding_lookup(embedding_weights, word_idx)
|
||||||
|
|
||||||
|
lstm_cell = tf.nn.rnn_cell.LSTMCell(
|
||||||
|
num_units=hidden_dim, use_peepholes=False)
|
||||||
|
stacked_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * stacked_num)
|
||||||
|
|
||||||
|
# final_state [LSTMTuple(c, h), LSTMTuple(c, h) ...] total stacked_num LSTMTuples
|
||||||
|
_, final_state = tf.nn.dynamic_rnn(
|
||||||
|
cell=stacked_cell,
|
||||||
|
inputs=embedding,
|
||||||
|
dtype=tf.float32,
|
||||||
|
sequence_length=sequence_length)
|
||||||
|
|
||||||
|
w = tf.Variable(
|
||||||
|
tf.truncated_normal([hidden_dim, class_num]), dtype=tf.float32)
|
||||||
|
bias = tf.Variable(
|
||||||
|
tf.constant(
|
||||||
|
value=0.0, shape=[class_num], dtype=tf.float32))
|
||||||
|
prediction = tf.matmul(final_state[-1][1], w) + bias
|
||||||
|
|
||||||
|
if not is_train:
|
||||||
|
return (word_idx, sequence_length), tf.nn.softmax(prediction)
|
||||||
|
|
||||||
|
label = tf.placeholder(tf.int64, shape=[None, ])
|
||||||
|
loss = tf.nn.softmax_cross_entropy_with_logits(
|
||||||
|
labels=tf.one_hot(label, 2), logits=prediction)
|
||||||
|
avg_loss = tf.reduce_mean(loss)
|
||||||
|
|
||||||
|
correct_count = tf.equal(tf.argmax(prediction, 1), label)
|
||||||
|
acc = tf.reduce_mean(tf.cast(correct_count, tf.float32))
|
||||||
|
|
||||||
|
with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
|
||||||
|
g_acc = tf.metrics.accuracy(label, tf.argmax(prediction, axis=1))
|
||||||
|
vars = tf.contrib.framework.get_variables(
|
||||||
|
scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
|
||||||
|
reset_op = tf.variables_initializer(vars)
|
||||||
|
|
||||||
|
return (word_idx, sequence_length, label), avg_loss, acc, g_acc, reset_op
|
||||||
|
|
||||||
|
|
||||||
|
def padding_data(data, padding_size, value):
|
||||||
|
data = data + [value] * padding_size
|
||||||
|
return data[:padding_size]
|
||||||
|
|
||||||
|
|
||||||
|
def train(args):
|
||||||
|
word_dict = paddle.dataset.imdb.word_dict()
|
||||||
|
dict_size = len(word_dict)
|
||||||
|
|
||||||
|
feeding_list, avg_loss, acc, g_acc, reset_op = dynamic_lstm_model(
|
||||||
|
dict_size, args.embedding_dim, args.hidden_dim, args.stacked_num)
|
||||||
|
|
||||||
|
adam_optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
|
||||||
|
train_op = adam_optimizer.minimize(avg_loss)
|
||||||
|
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.imdb.train(word_dict), buf_size=25000),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
|
||||||
|
test_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.imdb.test(word_dict), buf_size=25000),
|
||||||
|
batch_size=args.batch_size)
|
||||||
|
|
||||||
|
def do_validation(sess):
|
||||||
|
sess.run(reset_op)
|
||||||
|
for batch_id, data in enumerate(test_reader()):
|
||||||
|
word_idx = map(lambda x: x[0], data)
|
||||||
|
sequence_length = np.array(
|
||||||
|
[len(seq) for seq in word_idx]).astype('int64')
|
||||||
|
maxlen = np.max(sequence_length)
|
||||||
|
word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx]
|
||||||
|
word_idx = np.array(word_idx).astype('int64')
|
||||||
|
label = np.array(map(lambda x: x[1], data)).astype('int64')
|
||||||
|
|
||||||
|
_, loss, fetch_acc, fetch_g_acc = sess.run(
|
||||||
|
[train_op, avg_loss, acc, g_acc],
|
||||||
|
feed_dict={
|
||||||
|
feeding_list[0]: word_idx,
|
||||||
|
feeding_list[1]: sequence_length,
|
||||||
|
feeding_list[2]: label
|
||||||
|
})
|
||||||
|
|
||||||
|
return fetch_g_acc[1]
|
||||||
|
|
||||||
|
config = tf.ConfigProto(
|
||||||
|
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
|
||||||
|
config.gpu_options.allow_growth = True
|
||||||
|
with tf.Session(config=config) as sess:
|
||||||
|
init_g = tf.global_variables_initializer()
|
||||||
|
init_l = tf.local_variables_initializer()
|
||||||
|
sess.run(init_l)
|
||||||
|
sess.run(init_g)
|
||||||
|
|
||||||
|
for pass_id in xrange(args.pass_num):
|
||||||
|
# clear accuracy local variable
|
||||||
|
sess.run(reset_op)
|
||||||
|
pass_start_time = time.time()
|
||||||
|
words_seen = 0
|
||||||
|
|
||||||
|
for batch_id, data in enumerate(train_reader()):
|
||||||
|
word_idx = map(lambda x: x[0], data)
|
||||||
|
sequence_length = np.array(
|
||||||
|
[len(seq) for seq in word_idx]).astype('int64')
|
||||||
|
words_seen += np.sum(sequence_length)
|
||||||
|
maxlen = np.max(sequence_length)
|
||||||
|
word_idx = [padding_data(seq, maxlen, 0) for seq in word_idx]
|
||||||
|
word_idx = np.array(word_idx).astype('int64')
|
||||||
|
label = np.array(map(lambda x: x[1], data)).astype('int64')
|
||||||
|
|
||||||
|
_, loss, fetch_acc, fetch_g_acc = sess.run(
|
||||||
|
[train_op, avg_loss, acc, g_acc],
|
||||||
|
feed_dict={
|
||||||
|
feeding_list[0]: word_idx,
|
||||||
|
feeding_list[1]: sequence_length,
|
||||||
|
feeding_list[2]: label
|
||||||
|
})
|
||||||
|
|
||||||
|
print("pass_id=%d, batch_id=%d, loss: %f, acc: %f, avg_acc: %f"
|
||||||
|
% (pass_id, batch_id, loss, fetch_acc, fetch_g_acc[1]))
|
||||||
|
|
||||||
|
pass_end_time = time.time()
|
||||||
|
time_consumed = pass_end_time - pass_start_time
|
||||||
|
words_per_sec = words_seen / time_consumed
|
||||||
|
test_acc = do_validation(sess)
|
||||||
|
print("pass_id=%d, test_acc: %f, words/s: %f, sec/pass: %f" %
|
||||||
|
(pass_id, test_acc, words_per_sec, time_consumed))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args()
|
||||||
|
print_arguments(args)
|
||||||
|
|
||||||
|
if args.infer_only:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
train(args)
|
File diff suppressed because it is too large
Load Diff
@ -1,67 +0,0 @@
|
|||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
if(NOT WITH_GPU)
|
|
||||||
return()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
include(ExternalProject)
|
|
||||||
|
|
||||||
set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
|
|
||||||
|
|
||||||
include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
|
|
||||||
|
|
||||||
if(WITH_DSO)
|
|
||||||
# If we use DSO, we do not build nccl, just download the dependencies
|
|
||||||
set(NCCL_BUILD_COMMAND "")
|
|
||||||
set(NCCL_INSTALL_COMMAND "")
|
|
||||||
set(NCCL_INSTALL_DIR "")
|
|
||||||
else()
|
|
||||||
# otherwise, we build nccl and link it.
|
|
||||||
set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
|
|
||||||
# Note: cuda 8.0 is needed to make nccl
|
|
||||||
# When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
|
|
||||||
set(NCCL_BUILD_COMMAND "make -j 8")
|
|
||||||
set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
ExternalProject_Add(
|
|
||||||
extern_nccl
|
|
||||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
|
||||||
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
|
|
||||||
GIT_TAG "v1.3.4-1"
|
|
||||||
PREFIX "${NCCL_SOURCE_DIR}"
|
|
||||||
UPDATE_COMMAND ""
|
|
||||||
CONFIGURE_COMMAND ""
|
|
||||||
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
|
|
||||||
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
|
|
||||||
INSTALL_DIR "${NCCL_INSTALL_DIR}"
|
|
||||||
TEST_COMMAND ""
|
|
||||||
)
|
|
||||||
|
|
||||||
if(WITH_DSO)
|
|
||||||
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
|
||||||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
|
|
||||||
file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
|
|
||||||
add_library(nccl STATIC ${dummyfile})
|
|
||||||
else()
|
|
||||||
add_library(nccl INTERFACE)
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
add_library(nccl STATIC IMPORTED GLOBAL)
|
|
||||||
set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
|
|
||||||
${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_dependencies(nccl extern_nccl)
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue