commit
736950603e
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,205 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import argparse
|
||||
import time
|
||||
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.profiler as profiler
|
||||
|
||||
SEED = 1
|
||||
DTYPE = "float32"
|
||||
|
||||
# random seed must set before configuring the network.
|
||||
# fluid.default_startup_program().random_seed = SEED
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser("mnist model benchmark.")
|
||||
parser.add_argument(
|
||||
'--batch_size', type=int, default=128, help='The minibatch size.')
|
||||
parser.add_argument(
|
||||
'--iterations', type=int, default=35, help='The number of minibatches.')
|
||||
parser.add_argument(
|
||||
'--pass_num', type=int, default=5, help='The number of passes.')
|
||||
parser.add_argument(
|
||||
'--device',
|
||||
type=str,
|
||||
default='GPU',
|
||||
choices=['CPU', 'GPU'],
|
||||
help='The device type.')
|
||||
parser.add_argument(
|
||||
'--infer_only', action='store_true', help='If set, run forward only.')
|
||||
parser.add_argument(
|
||||
'--use_cprof', action='store_true', help='If set, use cProfile.')
|
||||
parser.add_argument(
|
||||
'--use_nvprof',
|
||||
action='store_true',
|
||||
help='If set, use nvprof for CUDA.')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def print_arguments(args):
|
||||
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
|
||||
vars(args)['device'] == 'GPU')
|
||||
print('----------- Configuration Arguments -----------')
|
||||
for arg, value in sorted(vars(args).iteritems()):
|
||||
print('%s: %s' % (arg, value))
|
||||
print('------------------------------------------------')
|
||||
|
||||
|
||||
def cnn_model(data):
|
||||
conv_pool_1 = fluid.nets.simple_img_conv_pool(
|
||||
input=data,
|
||||
filter_size=5,
|
||||
num_filters=20,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act="relu")
|
||||
conv_pool_2 = fluid.nets.simple_img_conv_pool(
|
||||
input=conv_pool_1,
|
||||
filter_size=5,
|
||||
num_filters=50,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act="relu")
|
||||
|
||||
# TODO(dzhwinter) : refine the initializer and random seed settting
|
||||
SIZE = 10
|
||||
input_shape = conv_pool_2.shape
|
||||
param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
|
||||
scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5
|
||||
|
||||
predict = fluid.layers.fc(
|
||||
input=conv_pool_2,
|
||||
size=SIZE,
|
||||
act="softmax",
|
||||
param_attr=fluid.param_attr.ParamAttr(
|
||||
initializer=fluid.initializer.NormalInitializer(
|
||||
loc=0.0, scale=scale)))
|
||||
return predict
|
||||
|
||||
|
||||
def eval_test(exe, batch_acc, batch_size_tensor, inference_program):
|
||||
test_reader = paddle.batch(
|
||||
paddle.dataset.mnist.test(), batch_size=args.batch_size)
|
||||
test_pass_acc = fluid.average.WeightedAverage()
|
||||
for batch_id, data in enumerate(test_reader()):
|
||||
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
|
||||
data)).astype(DTYPE)
|
||||
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||
y_data = y_data.reshape([len(y_data), 1])
|
||||
|
||||
acc, weight = exe.run(inference_program,
|
||||
feed={"pixel": img_data,
|
||||
"label": y_data},
|
||||
fetch_list=[batch_acc, batch_size_tensor])
|
||||
test_pass_acc.add(value=acc, weight=weight)
|
||||
pass_acc = test_pass_acc.eval()
|
||||
return pass_acc
|
||||
|
||||
|
||||
def run_benchmark(model, args):
|
||||
if args.use_cprof:
|
||||
pr = cProfile.Profile()
|
||||
pr.enable()
|
||||
start_time = time.time()
|
||||
# Input data
|
||||
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
|
||||
# Train program
|
||||
predict = model(images)
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(x=cost)
|
||||
|
||||
# Evaluator
|
||||
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
|
||||
batch_acc = fluid.layers.accuracy(
|
||||
input=predict, label=label, total=batch_size_tensor)
|
||||
|
||||
# inference program
|
||||
inference_program = fluid.default_main_program().clone()
|
||||
with fluid.program_guard(inference_program):
|
||||
inference_program = fluid.io.get_inference_program(
|
||||
target_vars=[batch_acc, batch_size_tensor])
|
||||
|
||||
# Optimization
|
||||
opt = fluid.optimizer.AdamOptimizer(
|
||||
learning_rate=0.001, beta1=0.9, beta2=0.999)
|
||||
opt.minimize(avg_cost)
|
||||
|
||||
fluid.memory_optimize(fluid.default_main_program())
|
||||
|
||||
# Initialize executor
|
||||
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
# Parameter initialization
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
# Reader
|
||||
train_reader = paddle.batch(
|
||||
paddle.dataset.mnist.train(), batch_size=args.batch_size)
|
||||
|
||||
accuracy = fluid.average.WeightedAverage()
|
||||
for pass_id in range(args.pass_num):
|
||||
accuracy.reset()
|
||||
pass_start = time.time()
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
img_data = np.array(
|
||||
map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE)
|
||||
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||
y_data = y_data.reshape([len(y_data), 1])
|
||||
|
||||
start = time.time()
|
||||
outs = exe.run(
|
||||
fluid.default_main_program(),
|
||||
feed={"pixel": img_data,
|
||||
"label": y_data},
|
||||
fetch_list=[avg_cost, batch_acc, batch_size_tensor]
|
||||
) # The accuracy is the accumulation of batches, but not the current batch.
|
||||
accuracy.add(value=outs[1], weight=outs[2])
|
||||
end = time.time()
|
||||
loss = np.array(outs[0])
|
||||
acc = np.array(outs[1])
|
||||
print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
|
||||
(pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
|
||||
|
||||
pass_end = time.time()
|
||||
|
||||
train_avg_acc = accuracy.eval()
|
||||
test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor,
|
||||
inference_program)
|
||||
|
||||
print("pass=%d, train_avg_acc=%f, test_avg_acc=%f, elapse=%f" %
|
||||
(pass_id, train_avg_acc, test_avg_acc,
|
||||
(pass_end - pass_start) / 1000))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
print_arguments(args)
|
||||
if args.use_nvprof and args.device == 'GPU':
|
||||
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
|
||||
run_benchmark(cnn_model, args)
|
||||
else:
|
||||
run_benchmark(cnn_model, args)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
# This script benchmarking the PaddlePaddle Fluid on
|
||||
# single thread single GPU.
|
||||
export CUDNN_PATH=/paddle/cudnn_v5/cuda/lib
|
||||
|
||||
# disable openmp and mkl parallel
|
||||
#https://github.com/PaddlePaddle/Paddle/issues/7199
|
||||
export MKL_NUM_THREADS=1
|
||||
export OMP_NUM_THREADS=1
|
||||
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
|
||||
if [ $ht -eq 1 ]; then # HT is OFF
|
||||
if [ -z "$KMP_AFFINITY" ]; then
|
||||
export KMP_AFFINITY="granularity=fine,compact,0,0"
|
||||
fi
|
||||
if [ -z "$OMP_DYNAMIC" ]; then
|
||||
export OMP_DYNAMIC="FALSE"
|
||||
fi
|
||||
else # HT is ON
|
||||
if [ -z "$KMP_AFFINITY" ]; then
|
||||
export KMP_AFFINITY="granularity=fine,compact,1,0"
|
||||
fi
|
||||
fi
|
||||
# disable multi-gpu if have more than one
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$CUDNN_PATH:$LD_LIBRARY_PATH
|
||||
|
||||
|
||||
# vgg16
|
||||
# cifar10 gpu cifar10 128
|
||||
FLAGS_benchmark=true python fluid/vgg.py \
|
||||
--device=GPU \
|
||||
--batch_size=128 \
|
||||
--skip_batch_num=5 \
|
||||
--iterations=30 \
|
||||
2>&1 > vgg16_gpu_128.log
|
||||
|
||||
# resnet50
|
||||
# resnet50 gpu cifar10 128
|
||||
FLAGS_benchmark=true python fluid/resnet.py \
|
||||
--device=GPU \
|
||||
--batch_size=128 \
|
||||
--data_set=cifar10 \
|
||||
--model=resnet_cifar10 \
|
||||
--skip_batch_num=5 \
|
||||
--iterations=30 \
|
||||
2>&1 > resnet50_gpu_128.log
|
||||
|
||||
# lstm
|
@ -0,0 +1,209 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import cPickle
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
import numpy
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.dataset.imdb as imdb
|
||||
import paddle.fluid as fluid
|
||||
from paddle.v2 import batch
|
||||
import paddle.fluid.profiler as profiler
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser("Understand Sentiment by Dynamic RNN.")
|
||||
parser.add_argument(
|
||||
'--batch_size',
|
||||
type=int,
|
||||
default=32,
|
||||
help='The sequence number of a batch data. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--emb_dim',
|
||||
type=int,
|
||||
default=512,
|
||||
help='Dimension of embedding table. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--hidden_dim',
|
||||
type=int,
|
||||
default=512,
|
||||
help='Hidden size of lstm unit. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--pass_num',
|
||||
type=int,
|
||||
default=100,
|
||||
help='Epoch number to train. (default: %(default)d)')
|
||||
parser.add_argument(
|
||||
'--device',
|
||||
type=str,
|
||||
default='CPU',
|
||||
choices=['CPU', 'GPU'],
|
||||
help='The device type.')
|
||||
parser.add_argument(
|
||||
'--crop_size',
|
||||
type=int,
|
||||
default=int(os.environ.get('CROP_SIZE', '1500')),
|
||||
help='The max sentence length of input. Since this model use plain RNN,'
|
||||
' Gradient could be explored if sentence is too long')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
word_dict = imdb.word_dict()
|
||||
|
||||
|
||||
def crop_sentence(reader, crop_size):
|
||||
unk_value = word_dict['<unk>']
|
||||
|
||||
def __impl__():
|
||||
for item in reader():
|
||||
if len([x for x in item[0] if x != unk_value]) < crop_size:
|
||||
yield item
|
||||
|
||||
return __impl__
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
lstm_size = args.hidden_dim
|
||||
|
||||
data = fluid.layers.data(
|
||||
name="words", shape=[1], lod_level=1, dtype='int64')
|
||||
sentence = fluid.layers.embedding(
|
||||
input=data, size=[len(word_dict), args.emb_dim])
|
||||
|
||||
sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')
|
||||
|
||||
rnn = fluid.layers.DynamicRNN()
|
||||
with rnn.block():
|
||||
word = rnn.step_input(sentence)
|
||||
prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
|
||||
prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
|
||||
|
||||
def gate_common(
|
||||
ipt,
|
||||
hidden,
|
||||
size, ):
|
||||
gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
|
||||
gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
|
||||
gate = fluid.layers.sums(input=[gate0, gate1])
|
||||
return gate
|
||||
|
||||
forget_gate = fluid.layers.sigmoid(
|
||||
x=gate_common(word, prev_hidden, lstm_size))
|
||||
input_gate = fluid.layers.sigmoid(
|
||||
x=gate_common(word, prev_hidden, lstm_size))
|
||||
output_gate = fluid.layers.sigmoid(
|
||||
x=gate_common(word, prev_hidden, lstm_size))
|
||||
cell_gate = fluid.layers.tanh(
|
||||
x=gate_common(word, prev_hidden, lstm_size))
|
||||
|
||||
cell = fluid.layers.sums(input=[
|
||||
fluid.layers.elementwise_mul(
|
||||
x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
|
||||
x=input_gate, y=cell_gate)
|
||||
])
|
||||
|
||||
hidden = fluid.layers.elementwise_mul(
|
||||
x=output_gate, y=fluid.layers.tanh(x=cell))
|
||||
|
||||
rnn.update_memory(prev_cell, cell)
|
||||
rnn.update_memory(prev_hidden, hidden)
|
||||
rnn.output(hidden)
|
||||
|
||||
last = fluid.layers.sequence_pool(rnn(), 'last')
|
||||
logit = fluid.layers.fc(input=last, size=2, act='softmax')
|
||||
loss = fluid.layers.cross_entropy(
|
||||
input=logit,
|
||||
label=fluid.layers.data(
|
||||
name='label', shape=[1], dtype='int64'))
|
||||
loss = fluid.layers.mean(x=loss)
|
||||
|
||||
# add acc
|
||||
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
|
||||
batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
|
||||
shape=[1], dtype='int64'), total=batch_size_tensor)
|
||||
|
||||
inference_program = fluid.default_main_program().clone()
|
||||
with fluid.program_guard(inference_program):
|
||||
inference_program = fluid.io.get_inference_program(
|
||||
target_vars=[batch_acc, batch_size_tensor])
|
||||
|
||||
adam = fluid.optimizer.Adam()
|
||||
adam.minimize(loss)
|
||||
|
||||
fluid.memory_optimize(fluid.default_main_program())
|
||||
|
||||
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
def train_loop(pass_num, crop_size):
|
||||
with profiler.profiler(args.device, 'total') as prof:
|
||||
for pass_id in range(pass_num):
|
||||
train_reader = batch(
|
||||
paddle.reader.shuffle(
|
||||
crop_sentence(imdb.train(word_dict), crop_size),
|
||||
buf_size=25000),
|
||||
batch_size=args.batch_size)
|
||||
word_nums = 0
|
||||
pass_start_time = time.time()
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
tensor_words = to_lodtensor([x[0] for x in data], place)
|
||||
for x in data:
|
||||
word_nums += len(x[0])
|
||||
label = numpy.array([x[1] for x in data]).astype("int64")
|
||||
label = label.reshape((-1, 1))
|
||||
loss_np, acc, weight = exe.run(
|
||||
fluid.default_main_program(),
|
||||
feed={"words": tensor_words,
|
||||
"label": label},
|
||||
fetch_list=[loss, batch_acc, batch_size_tensor])
|
||||
print("pass_id=%d, batch_id=%d, loss=%f, acc=%f" %
|
||||
(pass_id, batch_id, loss_np, acc))
|
||||
|
||||
pass_end_time = time.time()
|
||||
time_consumed = pass_end_time - pass_start_time
|
||||
words_per_sec = word_nums / time_consumed
|
||||
print("pass_id=%d, sec/pass: %f, words/s: %f" %
|
||||
(pass_id, time_consumed, words_per_sec))
|
||||
|
||||
train_loop(args.pass_num, args.crop_size)
|
||||
|
||||
|
||||
def to_lodtensor(data, place):
|
||||
seq_lens = [len(seq) for seq in data]
|
||||
cur_len = 0
|
||||
lod = [cur_len]
|
||||
for l in seq_lens:
|
||||
cur_len += l
|
||||
lod.append(cur_len)
|
||||
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
|
||||
flattened_data = flattened_data.reshape([len(flattened_data), 1])
|
||||
res = fluid.LoDTensor()
|
||||
res.set(flattened_data, place)
|
||||
res.set_lod([lod])
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,220 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""VGG16 benchmark in Fluid"""
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import time
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.core as core
|
||||
import argparse
|
||||
import functools
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
'--batch_size', type=int, default=128, help="Batch size for training.")
|
||||
parser.add_argument(
|
||||
'--skip_batch_num',
|
||||
type=int,
|
||||
default=5,
|
||||
help='The first num of minibatch num to skip, for better performance test')
|
||||
parser.add_argument(
|
||||
'--iterations', type=int, default=80, help='The number of minibatches.')
|
||||
parser.add_argument(
|
||||
'--learning_rate',
|
||||
type=float,
|
||||
default=1e-3,
|
||||
help="Learning rate for training.")
|
||||
parser.add_argument('--pass_num', type=int, default=50, help="No. of passes.")
|
||||
parser.add_argument(
|
||||
'--device',
|
||||
type=str,
|
||||
default='GPU',
|
||||
choices=['CPU', 'GPU'],
|
||||
help="The device type.")
|
||||
parser.add_argument(
|
||||
'--data_format',
|
||||
type=str,
|
||||
default='NCHW',
|
||||
choices=['NCHW', 'NHWC'],
|
||||
help='The data order, now only support NCHW.')
|
||||
parser.add_argument(
|
||||
'--data_set',
|
||||
type=str,
|
||||
default='cifar10',
|
||||
choices=['cifar10', 'flowers'],
|
||||
help='Optional dataset for benchmark.')
|
||||
parser.add_argument(
|
||||
'--with_test',
|
||||
action='store_true',
|
||||
help='If set, test the testset during training.')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def vgg16_bn_drop(input):
|
||||
def conv_block(input, num_filter, groups, dropouts):
|
||||
return fluid.nets.img_conv_group(
|
||||
input=input,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
conv_num_filter=[num_filter] * groups,
|
||||
conv_filter_size=3,
|
||||
conv_act='relu',
|
||||
conv_with_batchnorm=True,
|
||||
conv_batchnorm_drop_rate=dropouts,
|
||||
pool_type='max')
|
||||
|
||||
conv1 = conv_block(input, 64, 2, [0.3, 0])
|
||||
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
|
||||
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
|
||||
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
|
||||
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
|
||||
|
||||
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
|
||||
fc1 = fluid.layers.fc(input=drop, size=512, act=None)
|
||||
bn = fluid.layers.batch_norm(input=fc1, act='relu')
|
||||
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
|
||||
fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
|
||||
return fc2
|
||||
|
||||
|
||||
def main():
|
||||
if args.data_set == "cifar10":
|
||||
classdim = 10
|
||||
if args.data_format == 'NCHW':
|
||||
data_shape = [3, 32, 32]
|
||||
else:
|
||||
data_shape = [32, 32, 3]
|
||||
else:
|
||||
classdim = 102
|
||||
if args.data_format == 'NCHW':
|
||||
data_shape = [3, 224, 224]
|
||||
else:
|
||||
data_shape = [224, 224, 3]
|
||||
|
||||
# Input data
|
||||
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
|
||||
# Train program
|
||||
net = vgg16_bn_drop(images)
|
||||
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(x=cost)
|
||||
|
||||
# Evaluator
|
||||
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
|
||||
batch_acc = fluid.layers.accuracy(
|
||||
input=predict, label=label, total=batch_size_tensor)
|
||||
|
||||
# inference program
|
||||
inference_program = fluid.default_main_program().clone()
|
||||
with fluid.program_guard(inference_program):
|
||||
inference_program = fluid.io.get_inference_program(
|
||||
target_vars=[batch_acc, batch_size_tensor])
|
||||
|
||||
# Optimization
|
||||
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
|
||||
opts = optimizer.minimize(avg_cost)
|
||||
|
||||
fluid.memory_optimize(fluid.default_main_program())
|
||||
|
||||
# Initialize executor
|
||||
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
# Parameter initialization
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
# data reader
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.cifar.train10()
|
||||
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
|
||||
buf_size=5120),
|
||||
batch_size=args.batch_size)
|
||||
test_reader = paddle.batch(
|
||||
paddle.dataset.cifar.test10()
|
||||
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
|
||||
batch_size=args.batch_size)
|
||||
|
||||
# test
|
||||
def test(exe):
|
||||
test_accuracy = fluid.average.WeightedAverage()
|
||||
for batch_id, data in enumerate(test_reader()):
|
||||
img_data = np.array(map(lambda x: x[0].reshape(data_shape),
|
||||
data)).astype("float32")
|
||||
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||
y_data = y_data.reshape([-1, 1])
|
||||
|
||||
acc, weight = exe.run(inference_program,
|
||||
feed={"pixel": img_data,
|
||||
"label": y_data},
|
||||
fetch_list=[batch_acc, batch_size_tensor])
|
||||
test_accuracy.add(value=acc, weight=weight)
|
||||
return test_accuracy.eval()
|
||||
|
||||
iters, num_samples, start_time = 0, 0, time.time()
|
||||
accuracy = fluid.average.WeightedAverage()
|
||||
for pass_id in range(args.pass_num):
|
||||
accuracy.reset()
|
||||
train_accs = []
|
||||
train_losses = []
|
||||
for batch_id, data in enumerate(train_reader()):
|
||||
if iters == args.skip_batch_num:
|
||||
start_time = time.time()
|
||||
num_samples = 0
|
||||
if iters == args.iterations:
|
||||
break
|
||||
img_data = np.array(map(lambda x: x[0].reshape(data_shape),
|
||||
data)).astype("float32")
|
||||
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
|
||||
y_data = y_data.reshape([-1, 1])
|
||||
|
||||
loss, acc, weight = exe.run(
|
||||
fluid.default_main_program(),
|
||||
feed={"pixel": img_data,
|
||||
"label": y_data},
|
||||
fetch_list=[avg_cost, batch_acc, batch_size_tensor])
|
||||
accuracy.add(value=acc, weight=weight)
|
||||
iters += 1
|
||||
num_samples += len(data)
|
||||
print(
|
||||
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
|
||||
(pass_id, iters, loss, acc)
|
||||
) # The accuracy is the accumulation of batches, but not the current batch.
|
||||
|
||||
pass_train_acc = accuracy.eval()
|
||||
train_losses.append(loss)
|
||||
train_accs.append(acc)
|
||||
# evaluation
|
||||
if args.with_test:
|
||||
pass_test_acc = test(exe)
|
||||
train_elapsed = time.time() - start_time
|
||||
print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
|
||||
(pass_id, np.mean(train_losses), np.mean(train_accs)))
|
||||
|
||||
|
||||
def print_arguments():
|
||||
print('----------- Configuration Arguments -----------')
|
||||
for arg, value in sorted(vars(args).iteritems()):
|
||||
print('%s: %s' % (arg, value))
|
||||
print('------------------------------------------------')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print_arguments()
|
||||
main()
|
@ -1,5 +1,143 @@
|
||||
############################
|
||||
Install, Build and Unit test
|
||||
############################
|
||||
.. _install_faq:
|
||||
|
||||
TBD
|
||||
###############################
|
||||
Compile, Install, and Unit Test
|
||||
###############################
|
||||
|
||||
.. contents::
|
||||
|
||||
1. Insufficient CUDA driver version
|
||||
----------------------------------------------------------------
|
||||
|
||||
Many users usually face issues like `Cuda Error: CUDA driver version is insufficient for CUDA runtime version` when running the PaddlePaddle GPU Docker image. The cause is that you may not map the local CUDA driver to a container directory.
|
||||
You can solve the issue by running the following commands:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_SO="$(\ls usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
|
||||
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
|
||||
$ docker run ${CUDA_SO} ${DEVICES} -it paddlepaddle/paddle:latest-gpu
|
||||
|
||||
For more infomation about Docker's installation and usage, please refer to `PaddlePaddle Docker documentation <http://www.paddlepaddle.org/docs/0.11.0/documentation/zh/getstarted/build_and_install/docker_install_en.html>`_ .
|
||||
|
||||
|
||||
2. Version mismatch between PythonLibs and PythonInterpreter
|
||||
----------------------------------------------------------------
|
||||
|
||||
It is a common bug when CMake looks up Python. If you install multiple versions of Python, Cmake may find the version mismatch between PythonLibs and PythonInterpreter . You are forced to specify a Python version, as follows.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cmake .. -DPYTHON_EXECUTABLE=<exc_path> -DPYTHON_LIBRARY=<lib_path> -DPYTHON_INCLUDE_DIR=<inc_path>
|
||||
|
||||
You should specify ``<exc_path>``, ``<lib_path>``, ``<inc_path>`` to your local paths.
|
||||
|
||||
3. PaddlePaddle version is 0.0.0
|
||||
------------------------------------------------
|
||||
This issue would happen when you run the code `paddle version` or `cmake ..`
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
CMake Warning at cmake/version.cmake:20 (message):
|
||||
Cannot add paddle version from git tag
|
||||
|
||||
You should pull all remote branches to your local machine with the command :code:`git fetch upstream` and then run :code:`cmake`
|
||||
|
||||
4. paddlepaddle\*.whl is not a supported wheel on this platform.
|
||||
------------------------------------------------------------------------
|
||||
|
||||
The primary cause for this issue is that it can not find the correct PaddlePaddle installation package that matches your current system.The latest PaddlePaddle Python installation package supports Linux x86_64 and MacOS 10.12 os including Python2.7 and Pip 9.0.1.
|
||||
|
||||
You can upgrade Pip with the following command\:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install --upgrade pip
|
||||
|
||||
If it does not work for you, you can run the command :code:`python -c "import pip; print(pip.pep425tags.get_supported())"` to get the suffix of Python package which your system may support and then compare it with the suffix of your installation.
|
||||
|
||||
If the system supports :code:`linux_x86_64` and the installation package is :code:`manylinux1_x86_64`, you should upgrade pip to the latest
|
||||
|
||||
if the system supports :code:`manylinux_x86_64` and the local installation package is :code:`linux1_x86_64`, you can rename the whl package to :code:`manylinux1_x86_64` and then try again.
|
||||
|
||||
|
||||
5. ImportError: No module named v2
|
||||
----------------------------------
|
||||
Please uninstall Paddle V1 if you have installed it before.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip uninstall py_paddle paddle
|
||||
|
||||
Then install Python for PaddlePaddle , enter the build directory and run the following commands
|
||||
|
||||
pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl
|
||||
|
||||
6. Illegal instruction
|
||||
-----------------------
|
||||
This issue may be caused by the wrong usage of PaddlePaddle binary version which uses avx SIMD instructions to increase the performance of cpu. Please choose the correct version.
|
||||
|
||||
7. Python unittest fails
|
||||
--------------------------------
|
||||
|
||||
If the following python unittest testcases fail:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
24 - test_PyDataProvider (Failed)
|
||||
26 - test_RecurrentGradientMachine (Failed)
|
||||
27 - test_NetworkCompare (Failed)
|
||||
28 - test_PyDataProvider2 (Failed)
|
||||
32 - test_Prediction (Failed)
|
||||
33 - test_Compare (Failed)
|
||||
34 - test_Trainer (Failed)
|
||||
35 - test_TrainerOnePass (Failed)
|
||||
36 - test_CompareTwoNets (Failed)
|
||||
37 - test_CompareTwoOpts (Failed)
|
||||
38 - test_CompareSparse (Failed)
|
||||
39 - test_recurrent_machine_generation (Failed)
|
||||
40 - test_PyDataProviderWrapper (Failed)
|
||||
41 - test_config_parser (Failed)
|
||||
42 - test_swig_api (Failed)
|
||||
43 - layers_test (Failed)
|
||||
|
||||
Please check the PaddlePaddle unittest logs which may suggest the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
paddle package is already in your PYTHONPATH. But unittest need a clean environment.
|
||||
Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'.
|
||||
|
||||
The solution is:
|
||||
|
||||
* Remove old PaddlePaddle to make a clean environment for the unit tests. If PaddlePaddle package is already in Python's site-packages, unit tests would refer Python package in site-packages instead of Python package in the :code:`/python` directory of the source directory. Setting :code:`PYTHONPATH` to :code:`/python` is also useless because Python's search path would give the priority to the installed Python package.
|
||||
|
||||
|
||||
8. Failed to download the MKLML library
|
||||
----------------------------------------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
make[2]: *** [third_party/mklml/src/extern_mklml-stamp/extern_mklml-download] error 4
|
||||
make[1]: *** [CMakeFiles/extern_mklml.dir/all] error 2
|
||||
make[1]: *** waiting for the unfinished jobs....
|
||||
|
||||
Cause: The network speed or SSL link causes the MKLML library to download unsuccessfully.
|
||||
|
||||
The solution is: manually download and install, the specific steps are as follows.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
// 1. enter the directory
|
||||
cd build/third_party/mklml/src/extern_mklml
|
||||
|
||||
// 2. check the size of the package, normally 75M, if less than 75M, the download fails
|
||||
du -sh mklml_lnx_2018.0.1.20171007.tgz
|
||||
|
||||
// 3. manually download and unzip and make the download success tag:
|
||||
wget --no-check-certificate https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz -c -O mklml_lnx_2018.0.1.20171007.tgz
|
||||
tar zxf mklml_lnx_2018.0.1.20171007.tgz
|
||||
touch ../extern_mklml-stamp/extern_mklml-download
|
||||
|
||||
// 4. then compile
|
||||
|
||||
|
@ -0,0 +1,62 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <thread> // NOLINT
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/operators/detail/grpc_client.h"
|
||||
#include "paddle/fluid/operators/detail/grpc_server.h"
|
||||
|
||||
namespace framework = paddle::framework;
|
||||
namespace platform = paddle::platform;
|
||||
namespace detail = paddle::operators::detail;
|
||||
|
||||
std::unique_ptr<detail::AsyncGRPCServer> rpc_service_;
|
||||
|
||||
void StartServer(const std::string& endpoint) {
|
||||
rpc_service_.reset(new detail::AsyncGRPCServer(endpoint));
|
||||
rpc_service_->RunSyncUpdate();
|
||||
}
|
||||
|
||||
TEST(PREFETCH, CPU) {
|
||||
// start up a server instance backend
|
||||
// TODO(Yancey1989): Need to start a server with optimize blocks and
|
||||
// prefetch blocks.
|
||||
std::thread server_thread(StartServer, "127.0.0.1:8889");
|
||||
framework::Scope scope;
|
||||
platform::CPUPlace place;
|
||||
platform::CPUDeviceContext ctx(place);
|
||||
// create var on local scope
|
||||
std::string in_var_name("in");
|
||||
std::string out_var_name("out");
|
||||
auto* in_var = scope.Var(in_var_name);
|
||||
auto* in_tensor = in_var->GetMutable<framework::LoDTensor>();
|
||||
in_tensor->Resize({10, 10});
|
||||
VLOG(3) << "before mutable_data";
|
||||
in_tensor->mutable_data<int>(place);
|
||||
|
||||
scope.Var(out_var_name);
|
||||
|
||||
VLOG(3) << "before fetch";
|
||||
detail::RPCClient client;
|
||||
client.AsyncPrefetchVariable("127.0.0.1:8889", ctx, scope, in_var_name,
|
||||
out_var_name);
|
||||
client.Wait();
|
||||
|
||||
rpc_service_->ShutDown();
|
||||
server_thread.join();
|
||||
rpc_service_.reset(nullptr);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue