remove benchmark folder, since there is a benchmark repo already, distributed benchmark will be maintained in fleet repo (#18537)
test=developsum_op
@ -1,12 +0,0 @@
|
|||||||
paddle/image/logs
|
|
||||||
paddle/image/*.pyc
|
|
||||||
paddle/image/train.list
|
|
||||||
paddle/rnn/logs
|
|
||||||
paddle/rnn/*.pyc
|
|
||||||
paddle/rnn/imdb.pkl
|
|
||||||
caffe/image/logs
|
|
||||||
tensorflow/image/logs
|
|
||||||
tensorflow/rnn/logs
|
|
||||||
fluid/models/*.pyc
|
|
||||||
fluid/logs
|
|
||||||
fluid/nohup.out
|
|
@ -1,30 +0,0 @@
|
|||||||
set -e
|
|
||||||
|
|
||||||
function test() {
|
|
||||||
cfg=$1
|
|
||||||
batch=$2
|
|
||||||
prefix=$3
|
|
||||||
sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
|
|
||||||
sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
|
|
||||||
caffe time --model=$cfg --iterations=50 --gpu 0 > logs/$prefix-1gpu-batch${batch}.log 2>&1
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ ! -d "logs" ]; then
|
|
||||||
mkdir logs
|
|
||||||
fi
|
|
||||||
|
|
||||||
# alexnet
|
|
||||||
test alexnet.prototxt 64 alexnet
|
|
||||||
test alexnet.prototxt 128 alexnet
|
|
||||||
test alexnet.prototxt 256 alexnet
|
|
||||||
test alexnet.prototxt 512 alexnet
|
|
||||||
|
|
||||||
# googlenet
|
|
||||||
test googlenet.prototxt 64 googlenet
|
|
||||||
test googlenet.prototxt 128 googlenet
|
|
||||||
|
|
||||||
# small net
|
|
||||||
test smallnet_mnist_cifar.prototxt 64 smallnet
|
|
||||||
test smallnet_mnist_cifar.prototxt 128 smallnet
|
|
||||||
test smallnet_mnist_cifar.prototxt 256 smallnet
|
|
||||||
test smallnet_mnist_cifar.prototxt 512 smallnet
|
|
@ -1,24 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
function test() {
|
|
||||||
cfg=$1
|
|
||||||
batch=$2
|
|
||||||
prefix=$3
|
|
||||||
batch_per_gpu=`expr ${batch} / 4`
|
|
||||||
sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg
|
|
||||||
sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg
|
|
||||||
sed -i "1c\net : \"${cfg}\"" solver.prototxt
|
|
||||||
caffe train --solver=solver.prototxt -gpu 0,1,2,3 > logs/${prefix}-4gpu-batch${batch}.log 2>&1
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ ! -d "logs" ]; then
|
|
||||||
mkdir logs
|
|
||||||
fi
|
|
||||||
|
|
||||||
# alexnet
|
|
||||||
test alexnet.prototxt 512 alexnet
|
|
||||||
test alexnet.prototxt 1024 alexnet
|
|
||||||
|
|
||||||
# googlnet
|
|
||||||
test googlenet.prototxt 512 googlenet
|
|
@ -1,198 +0,0 @@
|
|||||||
name: "mnist/cifar"
|
|
||||||
input: "data"
|
|
||||||
input_dim: 128
|
|
||||||
input_dim: 3
|
|
||||||
input_dim: 32
|
|
||||||
input_dim: 32
|
|
||||||
input: "label"
|
|
||||||
input_dim: 128
|
|
||||||
input_dim: 1
|
|
||||||
input_dim: 1
|
|
||||||
input_dim: 1
|
|
||||||
layer {
|
|
||||||
name: "conv1"
|
|
||||||
type: "Convolution"
|
|
||||||
bottom: "data"
|
|
||||||
top: "conv1"
|
|
||||||
param {
|
|
||||||
lr_mult: 1
|
|
||||||
}
|
|
||||||
param {
|
|
||||||
lr_mult: 2
|
|
||||||
}
|
|
||||||
convolution_param {
|
|
||||||
num_output: 32
|
|
||||||
pad: 2
|
|
||||||
kernel_size: 5
|
|
||||||
stride: 1
|
|
||||||
weight_filler {
|
|
||||||
type: "gaussian"
|
|
||||||
std: 0.0001
|
|
||||||
}
|
|
||||||
bias_filler {
|
|
||||||
type: "constant"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "pool1"
|
|
||||||
type: "Pooling"
|
|
||||||
bottom: "conv1"
|
|
||||||
top: "pool1"
|
|
||||||
pooling_param {
|
|
||||||
pool: MAX
|
|
||||||
kernel_size: 3
|
|
||||||
stride: 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "relu1"
|
|
||||||
type: "ReLU"
|
|
||||||
bottom: "pool1"
|
|
||||||
top: "pool1"
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "conv2"
|
|
||||||
type: "Convolution"
|
|
||||||
bottom: "pool1"
|
|
||||||
top: "conv2"
|
|
||||||
param {
|
|
||||||
lr_mult: 1
|
|
||||||
}
|
|
||||||
param {
|
|
||||||
lr_mult: 2
|
|
||||||
}
|
|
||||||
convolution_param {
|
|
||||||
num_output: 32
|
|
||||||
pad: 2
|
|
||||||
kernel_size: 5
|
|
||||||
stride: 1
|
|
||||||
weight_filler {
|
|
||||||
type: "gaussian"
|
|
||||||
std: 0.01
|
|
||||||
}
|
|
||||||
bias_filler {
|
|
||||||
type: "constant"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "relu2"
|
|
||||||
type: "ReLU"
|
|
||||||
bottom: "conv2"
|
|
||||||
top: "conv2"
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "pool2"
|
|
||||||
type: "Pooling"
|
|
||||||
bottom: "conv2"
|
|
||||||
top: "pool2"
|
|
||||||
pooling_param {
|
|
||||||
pool: AVE
|
|
||||||
kernel_size: 3
|
|
||||||
stride: 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "conv3"
|
|
||||||
type: "Convolution"
|
|
||||||
bottom: "pool2"
|
|
||||||
top: "conv3"
|
|
||||||
param {
|
|
||||||
lr_mult: 1
|
|
||||||
}
|
|
||||||
param {
|
|
||||||
lr_mult: 2
|
|
||||||
}
|
|
||||||
convolution_param {
|
|
||||||
num_output: 64
|
|
||||||
pad: 2
|
|
||||||
kernel_size: 5
|
|
||||||
stride: 1
|
|
||||||
weight_filler {
|
|
||||||
type: "gaussian"
|
|
||||||
std: 0.01
|
|
||||||
}
|
|
||||||
bias_filler {
|
|
||||||
type: "constant"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "relu3"
|
|
||||||
type: "ReLU"
|
|
||||||
bottom: "conv3"
|
|
||||||
top: "conv3"
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "pool3"
|
|
||||||
type: "Pooling"
|
|
||||||
bottom: "conv3"
|
|
||||||
top: "pool3"
|
|
||||||
pooling_param {
|
|
||||||
pool: AVE
|
|
||||||
kernel_size: 3
|
|
||||||
stride: 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "ip1"
|
|
||||||
type: "InnerProduct"
|
|
||||||
bottom: "pool3"
|
|
||||||
top: "ip1"
|
|
||||||
param {
|
|
||||||
lr_mult: 1
|
|
||||||
}
|
|
||||||
param {
|
|
||||||
lr_mult: 2
|
|
||||||
}
|
|
||||||
inner_product_param {
|
|
||||||
num_output: 64
|
|
||||||
weight_filler {
|
|
||||||
type: "gaussian"
|
|
||||||
std: 0.1
|
|
||||||
}
|
|
||||||
bias_filler {
|
|
||||||
type: "constant"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "ip2"
|
|
||||||
type: "InnerProduct"
|
|
||||||
bottom: "ip1"
|
|
||||||
top: "ip2"
|
|
||||||
param {
|
|
||||||
lr_mult: 1
|
|
||||||
}
|
|
||||||
param {
|
|
||||||
lr_mult: 2
|
|
||||||
}
|
|
||||||
inner_product_param {
|
|
||||||
num_output: 10
|
|
||||||
weight_filler {
|
|
||||||
type: "gaussian"
|
|
||||||
std: 0.1
|
|
||||||
}
|
|
||||||
bias_filler {
|
|
||||||
type: "constant"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "accuracy"
|
|
||||||
type: "Accuracy"
|
|
||||||
bottom: "ip2"
|
|
||||||
bottom: "label"
|
|
||||||
top: "accuracy"
|
|
||||||
include {
|
|
||||||
phase: TEST
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer {
|
|
||||||
name: "loss"
|
|
||||||
type: "SoftmaxWithLoss"
|
|
||||||
bottom: "ip2"
|
|
||||||
bottom: "label"
|
|
||||||
top: "loss"
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
net: "alexnet.prototxt"
|
|
||||||
base_lr: 0.01
|
|
||||||
lr_policy: "fixed"
|
|
||||||
display: 20
|
|
||||||
max_iter: 200
|
|
||||||
momentum: 0.9
|
|
||||||
weight_decay: 0.0005
|
|
||||||
snapshot: 10000
|
|
||||||
snapshot_prefix: "models/caffe_alexnet_train"
|
|
||||||
solver_mode: GPU
|
|
Before Width: | Height: | Size: 82 KiB |
Before Width: | Height: | Size: 15 KiB |
Before Width: | Height: | Size: 16 KiB |
Before Width: | Height: | Size: 82 KiB |
Before Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 19 KiB |
Before Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 72 KiB |
Before Width: | Height: | Size: 115 KiB |
Before Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 17 KiB |
@ -1,30 +0,0 @@
|
|||||||
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
|
|
||||||
|
|
||||||
# Use UBUNTU_MIRROR can speed up apt-get speed.
|
|
||||||
# ARG UBUNTU_MIRROR
|
|
||||||
# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
|
|
||||||
RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
|
|
||||||
|
|
||||||
# IMPORTANT:
|
|
||||||
# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
|
|
||||||
# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ...
|
|
||||||
|
|
||||||
|
|
||||||
RUN pip install -U pip
|
|
||||||
RUN pip install -U kubernetes paddlepaddle
|
|
||||||
|
|
||||||
RUN pip uninstall -y paddlepaddle && mkdir /workspace
|
|
||||||
|
|
||||||
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
|
|
||||||
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
|
|
||||||
RUN chmod +x /usr/bin/paddle_k8s
|
|
||||||
|
|
||||||
ADD *.whl /
|
|
||||||
RUN pip install /*.whl && rm -f /*.whl
|
|
||||||
|
|
||||||
ENV LD_LIBRARY_PATH=/usr/local/lib
|
|
||||||
ADD fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh imagenet_reader.py /workspace/
|
|
||||||
ADD models/ /workspace/models/
|
|
||||||
|
|
@ -1,99 +0,0 @@
|
|||||||
# Fluid Benchmark
|
|
||||||
|
|
||||||
This directory contains several models configurations and tools that used to run
|
|
||||||
Fluid benchmarks for local and distributed training.
|
|
||||||
|
|
||||||
|
|
||||||
## Run the Benchmark
|
|
||||||
|
|
||||||
To start, run the following command to get the full help message:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python fluid_benchmark.py --help
|
|
||||||
```
|
|
||||||
|
|
||||||
Currently supported `--model` argument include:
|
|
||||||
|
|
||||||
* mnist
|
|
||||||
* resnet
|
|
||||||
* you can chose to use different dataset using `--data_set cifar10` or
|
|
||||||
`--data_set flowers`.
|
|
||||||
* vgg
|
|
||||||
* stacked_dynamic_lstm
|
|
||||||
* machine_translation
|
|
||||||
|
|
||||||
* Run the following command to start a benchmark job locally:
|
|
||||||
```bash
|
|
||||||
python fluid_benchmark.py --model mnist --device GPU
|
|
||||||
```
|
|
||||||
You can choose to use GPU/CPU training. With GPU training, you can specify
|
|
||||||
`--gpus <gpu_num>` to run multi GPU training.
|
|
||||||
You can set async mode parameter server. With async mode, you can specify
|
|
||||||
`--async_mode` to train model asynchronous.
|
|
||||||
* Run distributed training with parameter servers:
|
|
||||||
* see [run_fluid_benchmark.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/fluid/run_fluid_benchmark.sh) as an example.
|
|
||||||
* start parameter servers:
|
|
||||||
```bash
|
|
||||||
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
|
|
||||||
sleep 15
|
|
||||||
```
|
|
||||||
* start trainers:
|
|
||||||
```bash
|
|
||||||
PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
|
|
||||||
```
|
|
||||||
* Run distributed training using NCCL2
|
|
||||||
```bash
|
|
||||||
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2
|
|
||||||
```
|
|
||||||
|
|
||||||
## Prepare the RecordIO file to Achieve Better Performance
|
|
||||||
|
|
||||||
Run the following command will generate RecordIO files like "mnist.recordio" under the path
|
|
||||||
and batch_size you choose, you can use batch_size=1 so that later reader can change the batch_size
|
|
||||||
at any time using `fluid.batch`.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -c 'from recordio_converter import *; prepare_mnist("data", 1)'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Run Distributed Benchmark on Kubernetes Cluster
|
|
||||||
|
|
||||||
You may need to build a Docker image before submitting a cluster job onto Kubernetes, or you will
|
|
||||||
have to start all those processes manually on each node, which is not recommended.
|
|
||||||
|
|
||||||
To build the Docker image, you need to choose a paddle "whl" package to run with, you may either
|
|
||||||
download it from
|
|
||||||
http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_en.html or
|
|
||||||
build it by your own. Once you've got the "whl" package, put it under the current directory and run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker build -t [your docker image name]:[your docker image tag] .
|
|
||||||
```
|
|
||||||
|
|
||||||
Then push the image to a Docker registry that your Kubernetes cluster can reach.
|
|
||||||
|
|
||||||
We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit
|
|
||||||
distributed benchmark jobs to your cluster. To generate a job yaml, just run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --gpus 8 --device GPU --update_method pserver " --disttype pserver
|
|
||||||
```
|
|
||||||
|
|
||||||
Then the yaml files are generated under directory `myjob`, you can run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
kubectl create -f myjob/
|
|
||||||
```
|
|
||||||
|
|
||||||
The job shall start.
|
|
||||||
|
|
||||||
|
|
||||||
## Notes for Run Fluid Distributed with NCCL2 and RDMA
|
|
||||||
|
|
||||||
Before running NCCL2 distributed jobs, please check that whether your node has multiple network
|
|
||||||
interfaces, try to add the environment variable `export NCCL_SOCKET_IFNAME=eth0` to use your actual
|
|
||||||
network device.
|
|
||||||
|
|
||||||
To run high-performance distributed training, you must prepare your hardware environment to be
|
|
||||||
able to run RDMA enabled network communication, please check out [this](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/cluster/nccl2_rdma_training.md)
|
|
||||||
note for details.
|
|
@ -1,151 +0,0 @@
|
|||||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
__all__ = ['parse_args', ]
|
|
||||||
|
|
||||||
BENCHMARK_MODELS = [
|
|
||||||
"machine_translation", "resnet", "se_resnext", "vgg", "mnist",
|
|
||||||
"stacked_dynamic_lstm", "resnet_with_preprocess"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser('Fluid model benchmarks.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--model',
|
|
||||||
type=str,
|
|
||||||
choices=BENCHMARK_MODELS,
|
|
||||||
default='resnet',
|
|
||||||
help='The model to run benchmark with.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--batch_size', type=int, default=32, help='The minibatch size.')
|
|
||||||
# args related to learning rate
|
|
||||||
parser.add_argument(
|
|
||||||
'--learning_rate', type=float, default=0.001, help='The learning rate.')
|
|
||||||
# TODO(wuyi): add "--use_fake_data" option back.
|
|
||||||
parser.add_argument(
|
|
||||||
'--skip_batch_num',
|
|
||||||
type=int,
|
|
||||||
default=5,
|
|
||||||
help='The first num of minibatch num to skip, for better performance test'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--iterations', type=int, default=80, help='The number of minibatches.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--pass_num', type=int, default=100, help='The number of passes.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--data_format',
|
|
||||||
type=str,
|
|
||||||
default='NCHW',
|
|
||||||
choices=['NCHW', 'NHWC'],
|
|
||||||
help='The data data_format, now only support NCHW.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--device',
|
|
||||||
type=str,
|
|
||||||
default='GPU',
|
|
||||||
choices=['CPU', 'GPU'],
|
|
||||||
help='The device type.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--gpus',
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
|
|
||||||
# this option is available only for vgg and resnet.
|
|
||||||
parser.add_argument(
|
|
||||||
'--cpus',
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help='If cpus > 1, will set ParallelExecutor to use multiple threads.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--data_set',
|
|
||||||
type=str,
|
|
||||||
default='flowers',
|
|
||||||
choices=['cifar10', 'flowers', 'imagenet'],
|
|
||||||
help='Optional dataset for benchmark.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--infer_only', action='store_true', help='If set, run forward only.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--use_cprof', action='store_true', help='If set, use cProfile.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--use_nvprof',
|
|
||||||
action='store_true',
|
|
||||||
help='If set, use nvprof for CUDA.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--no_test',
|
|
||||||
action='store_true',
|
|
||||||
help='If set, do not test the testset during training.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--memory_optimize',
|
|
||||||
action='store_true',
|
|
||||||
help='If set, optimize runtime memory before start.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--use_fake_data',
|
|
||||||
action='store_true',
|
|
||||||
help='If set ommit the actual read data operators.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--profile', action='store_true', help='If set, profile a few steps.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--update_method',
|
|
||||||
type=str,
|
|
||||||
default='local',
|
|
||||||
choices=['local', 'pserver', 'nccl2'],
|
|
||||||
help='Choose parameter update method, can be local, pserver, nccl2.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--no_split_var',
|
|
||||||
action='store_true',
|
|
||||||
default=False,
|
|
||||||
help='Whether split variables into blocks when update_method is pserver')
|
|
||||||
parser.add_argument(
|
|
||||||
'--async_mode',
|
|
||||||
action='store_true',
|
|
||||||
default=False,
|
|
||||||
help='Whether start pserver in async mode to support ASGD')
|
|
||||||
parser.add_argument(
|
|
||||||
'--use_reader_op',
|
|
||||||
action='store_true',
|
|
||||||
help='Whether to use reader op, and must specify the data path if set this to true.'
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--data_path',
|
|
||||||
type=str,
|
|
||||||
default="",
|
|
||||||
help='Directory that contains all the training recordio files.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--test_data_path',
|
|
||||||
type=str,
|
|
||||||
default="",
|
|
||||||
help='Directory that contains all the test data (NOT recordio).')
|
|
||||||
parser.add_argument(
|
|
||||||
'--use_inference_transpiler',
|
|
||||||
action='store_true',
|
|
||||||
help='If set, use inference transpiler to optimize the program.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--no_random',
|
|
||||||
action='store_true',
|
|
||||||
help='If set, keep the random seed and do not shuffle the data.')
|
|
||||||
parser.add_argument(
|
|
||||||
'--reduce_strategy',
|
|
||||||
type=str,
|
|
||||||
choices=['reduce', 'all_reduce'],
|
|
||||||
default='all_reduce',
|
|
||||||
help='Specify the reduce strategy, can be reduce, all_reduce')
|
|
||||||
parser.add_argument(
|
|
||||||
'--fuse_broadcast_op',
|
|
||||||
action='store_true',
|
|
||||||
help='If set, would fuse multiple broadcast operators into one fused_broadcast operator.'
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
return args
|
|