Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into change_manylinux1_Docker
commit
ba84a6b7ed
@ -0,0 +1,31 @@
|
|||||||
|
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
|
||||||
|
|
||||||
|
# Use UBUNTU_MIRROR can speed up apt-get speed.
|
||||||
|
# ARG UBUNTU_MIRROR
|
||||||
|
# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
|
||||||
|
RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
|
||||||
|
|
||||||
|
# IMPORTANT:
|
||||||
|
# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
|
||||||
|
# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ...
|
||||||
|
|
||||||
|
RUN pip install -U pip
|
||||||
|
RUN pip install -U kubernetes paddlepaddle
|
||||||
|
|
||||||
|
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python'
|
||||||
|
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python'
|
||||||
|
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.imikolov.fetch()" | python'
|
||||||
|
RUN pip uninstall -y paddlepaddle && mkdir /workspace
|
||||||
|
|
||||||
|
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
|
||||||
|
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
|
||||||
|
RUN chmod +x /usr/bin/paddle_k8s
|
||||||
|
|
||||||
|
ADD *.whl /
|
||||||
|
RUN pip install /*.whl && rm -f /*.whl
|
||||||
|
|
||||||
|
ENV LD_LIBRARY_PATH=/usr/local/lib
|
||||||
|
ADD fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh /workspace/
|
||||||
|
ADD models/ /workspace/models/
|
@ -0,0 +1,134 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
__all__ = ['parse_args', ]
|
||||||
|
|
||||||
|
BENCHMARK_MODELS = [
|
||||||
|
"machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser('Fluid model benchmarks.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--model',
|
||||||
|
type=str,
|
||||||
|
choices=BENCHMARK_MODELS,
|
||||||
|
default='resnet',
|
||||||
|
help='The model to run benchmark with.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--batch_size', type=int, default=32, help='The minibatch size.')
|
||||||
|
# args related to learning rate
|
||||||
|
parser.add_argument(
|
||||||
|
'--learning_rate', type=float, default=0.001, help='The learning rate.')
|
||||||
|
# TODO(wuyi): add "--use_fake_data" option back.
|
||||||
|
parser.add_argument(
|
||||||
|
'--skip_batch_num',
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help='The first num of minibatch num to skip, for better performance test'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--iterations', type=int, default=80, help='The number of minibatches.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--pass_num', type=int, default=100, help='The number of passes.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--data_format',
|
||||||
|
type=str,
|
||||||
|
default='NCHW',
|
||||||
|
choices=['NCHW', 'NHWC'],
|
||||||
|
help='The data data_format, now only support NCHW.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--device',
|
||||||
|
type=str,
|
||||||
|
default='GPU',
|
||||||
|
choices=['CPU', 'GPU'],
|
||||||
|
help='The device type.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--gpus',
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
|
||||||
|
# this option is available only for vgg and resnet.
|
||||||
|
parser.add_argument(
|
||||||
|
'--cpus',
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help='If cpus > 1, will use ParallelDo to run, else use Executor.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--data_set',
|
||||||
|
type=str,
|
||||||
|
default='flowers',
|
||||||
|
choices=['cifar10', 'flowers'],
|
||||||
|
help='Optional dataset for benchmark.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--infer_only', action='store_true', help='If set, run forward only.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_cprof', action='store_true', help='If set, use cProfile.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_nvprof',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, use nvprof for CUDA.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--no_test',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, do not test the testset during training.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--memory_optimize',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, optimize runtime memory before start.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_fake_data',
|
||||||
|
action='store_true',
|
||||||
|
help='If set ommit the actual read data operators.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--profile', action='store_true', help='If set, profile a few steps.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--update_method',
|
||||||
|
type=str,
|
||||||
|
default='local',
|
||||||
|
choices=['local', 'pserver', 'nccl2'],
|
||||||
|
help='Choose parameter update method, can be local, pserver, nccl2.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--no_split_var',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='Whether split variables into blocks when update_method is pserver')
|
||||||
|
parser.add_argument(
|
||||||
|
'--async_mode',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='Whether start pserver in async mode to support ASGD')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_reader_op',
|
||||||
|
action='store_true',
|
||||||
|
help='Whether to use reader op, and must specify the data path if set this to true.'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--data_path',
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help='Directory that contains all the training recordio files.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--use_inference_transpiler',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, use inference transpiler to optimize the program.')
|
||||||
|
parser.add_argument(
|
||||||
|
'--no_random',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, keep the random seed and do not shuffle the data.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,164 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import paddle
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
import paddle.fluid.core as core
|
||||||
|
from paddle.dataset import mnist, cifar, flowers, image
|
||||||
|
|
||||||
|
|
||||||
|
def convert_2_recordio(py_reader, outfilepath, batch_size, shape_data,
|
||||||
|
shape_label):
|
||||||
|
num_batches = 0
|
||||||
|
with fluid.program_guard(fluid.Program(), fluid.Program()):
|
||||||
|
reader = paddle.batch(py_reader(), batch_size=batch_size)
|
||||||
|
feeder = fluid.DataFeeder(
|
||||||
|
feed_list=[ # order is image and label
|
||||||
|
fluid.layers.data(
|
||||||
|
name='image', shape=shape_data),
|
||||||
|
fluid.layers.data(
|
||||||
|
name='label', shape=shape_label, dtype='int64'),
|
||||||
|
],
|
||||||
|
place=fluid.CPUPlace())
|
||||||
|
num_batches = fluid.recordio_writer.convert_reader_to_recordio_file(
|
||||||
|
outfilepath, reader, feeder)
|
||||||
|
return num_batches
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_mnist(outpath, batch_size):
|
||||||
|
outfilepath = os.path.join(outpath, "mnist.recordio")
|
||||||
|
convert_2_recordio(mnist.train, outfilepath, batch_size, [784], [1])
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_cifar10(outpath, batch_size):
|
||||||
|
outfilepath = os.path.join(outpath, "cifar.recordio")
|
||||||
|
convert_2_recordio(cifar.train10, outfilepath, batch_size, [3, 32, 32], [1])
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_flowers(outpath, batch_size):
|
||||||
|
outfilepath = os.path.join(outpath, "flowers.recordio")
|
||||||
|
convert_2_recordio(flowers.train, outfilepath, batch_size, [3, 224, 224],
|
||||||
|
[1])
|
||||||
|
|
||||||
|
|
||||||
|
def default_mapper(sample):
|
||||||
|
img, label = sample
|
||||||
|
img = image.simple_transform(
|
||||||
|
img, 256, 224, True, mean=[103.94, 116.78, 123.68])
|
||||||
|
return img.flatten().astype('float32'), label
|
||||||
|
|
||||||
|
|
||||||
|
def imagenet_train(data_dir):
|
||||||
|
contents = os.listdir(data_dir)
|
||||||
|
if set(contents) != set(
|
||||||
|
["train", "train.txt", "val", "val_set", "val.txt", "unzip.sh"]):
|
||||||
|
raise Exception("Imagenet data contents error!")
|
||||||
|
img2label = dict()
|
||||||
|
imgfilelist = []
|
||||||
|
with open(os.path.join(data_dir, "train.txt")) as fn:
|
||||||
|
while 1:
|
||||||
|
l = fn.readline()
|
||||||
|
if not l:
|
||||||
|
break
|
||||||
|
img, lbl = l[:-1].split(" ")
|
||||||
|
img2label[img] = int(lbl)
|
||||||
|
imgfilelist.append(img)
|
||||||
|
# shuffle all, this is slow
|
||||||
|
random.shuffle(imgfilelist)
|
||||||
|
|
||||||
|
def train_reader():
|
||||||
|
for idx, imgfile in enumerate(imgfilelist):
|
||||||
|
data = image.load_image(
|
||||||
|
os.path.join(data_dir, "train", imgfile.lower()))
|
||||||
|
label = [img2label[imgfile], ]
|
||||||
|
yield [data, label]
|
||||||
|
|
||||||
|
return paddle.reader.map_readers(default_mapper, train_reader)
|
||||||
|
|
||||||
|
|
||||||
|
def imagenet_test(data_dir):
|
||||||
|
contents = os.listdir(data_dir)
|
||||||
|
if set(contents) != set(
|
||||||
|
["train", "train.txt", "val", "val_set", "val.txt", "unzip.sh"]):
|
||||||
|
raise Exception("Imagenet data contents error!")
|
||||||
|
img2label = dict()
|
||||||
|
imgfilelist = []
|
||||||
|
with open(os.path.join(data_dir, "val.txt")) as fn:
|
||||||
|
while 1:
|
||||||
|
l = fn.readline()
|
||||||
|
if not l:
|
||||||
|
break
|
||||||
|
img, lbl = l[:-1].split(" ")
|
||||||
|
img2label[img] = int(lbl)
|
||||||
|
imgfilelist.append(img)
|
||||||
|
|
||||||
|
def test_reader():
|
||||||
|
for idx, imgfile in enumerate(imgfilelist):
|
||||||
|
base_path = os.path.join(data_dir, "val", imgfile.split(".")[0])
|
||||||
|
image_path = ".".join([base_path, "jpeg"])
|
||||||
|
data = image.load_image(image_path)
|
||||||
|
label = [img2label[imgfile], ]
|
||||||
|
yield [data, label]
|
||||||
|
|
||||||
|
return paddle.reader.map_readers(default_mapper, test_reader)
|
||||||
|
|
||||||
|
|
||||||
|
# FIXME(wuyi): delete this when https://github.com/PaddlePaddle/Paddle/pull/11066 is merged
|
||||||
|
def convert_reader_to_recordio_files(
|
||||||
|
filename,
|
||||||
|
batch_per_file,
|
||||||
|
reader_creator,
|
||||||
|
feeder,
|
||||||
|
compressor=core.RecordIOWriter.Compressor.Snappy,
|
||||||
|
max_num_records=1000,
|
||||||
|
feed_order=None):
|
||||||
|
if feed_order is None:
|
||||||
|
feed_order = feeder.feed_names
|
||||||
|
f_name, f_ext = os.path.splitext(filename)
|
||||||
|
assert (f_ext == ".recordio")
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
f_idx = 0
|
||||||
|
counter = 0
|
||||||
|
for idx, batch in enumerate(reader_creator()):
|
||||||
|
lines.append(batch)
|
||||||
|
if idx >= batch_per_file and idx % batch_per_file == 0:
|
||||||
|
filename = "%s-%05d%s" % (f_name, f_idx, f_ext)
|
||||||
|
with fluid.recordio_writer.create_recordio_writer(
|
||||||
|
filename, compressor, max_num_records) as writer:
|
||||||
|
for l in lines:
|
||||||
|
res = feeder.feed(l)
|
||||||
|
for each in feed_order:
|
||||||
|
writer.append_tensor(res[each])
|
||||||
|
writer.complete_append_tensor()
|
||||||
|
counter += 1
|
||||||
|
lines = []
|
||||||
|
f_idx += 1
|
||||||
|
print("written file: ", filename)
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_imagenet(inpath, outpath, batch_size):
|
||||||
|
r = paddle.batch(imagenet_train(inpath), batch_size=batch_size)
|
||||||
|
feeder = fluid.DataFeeder(
|
||||||
|
feed_list=[
|
||||||
|
fluid.layers.data(
|
||||||
|
name="image", shape=[3, 224, 224]), fluid.layers.data(
|
||||||
|
name="label", shape=[1], dtype='int64')
|
||||||
|
],
|
||||||
|
place=fluid.CPUPlace())
|
||||||
|
outpath = os.path.join(outpath, "imagenet.recordio")
|
||||||
|
convert_reader_to_recordio_files(outpath, 10000, r, feeder)
|
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device CPU --update_method pserver --iterations=10000 &
|
||||||
|
|
||||||
|
sleep 15
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=0,1 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &
|
||||||
|
|
||||||
|
CUDA_VISIBLE_DEVICES=2,3 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=1 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue