remove benchmark folder, since there is a benchmark repo already, distributed benchmark will be maintained in fleet repo (#18537)

test=develop
6 years ago · 6f6ecbec4e
parent 1f1cc2221f
commit 6f6ecbec4e
59 changed files with 0 additions and 10654 deletions
--- a/benchmark/.gitignore
+++ b/benchmark/.gitignore
@ -1,12 +0,0 @@
-paddle/image/logs
-paddle/image/*.pyc
-paddle/image/train.list
-paddle/rnn/logs
-paddle/rnn/*.pyc
-paddle/rnn/imdb.pkl
-caffe/image/logs
-tensorflow/image/logs
-tensorflow/rnn/logs
-fluid/models/*.pyc
-fluid/logs
-fluid/nohup.out
--- a/benchmark/caffe/image/alexnet.prototxt
+++ b/benchmark/caffe/image/alexnet.prototxt
--- a/benchmark/caffe/image/googlenet.prototxt
+++ b/benchmark/caffe/image/googlenet.prototxt
--- a/benchmark/caffe/image/run.sh
+++ b/benchmark/caffe/image/run.sh
@ -1,30 +0,0 @@
-set -e
-
-function test() {
-  cfg=$1
-  batch=$2
-  prefix=$3
-  sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg 
-  sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
-  caffe time --model=$cfg --iterations=50 --gpu 0 > logs/$prefix-1gpu-batch${batch}.log 2>&1
-}
-
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-
-# alexnet
-test alexnet.prototxt 64 alexnet 
-test alexnet.prototxt 128 alexnet 
-test alexnet.prototxt 256 alexnet 
-test alexnet.prototxt 512 alexnet 
-
-# googlenet
-test googlenet.prototxt 64 googlenet 
-test googlenet.prototxt 128 googlenet 
-
-# small net 
-test smallnet_mnist_cifar.prototxt 64 smallnet 
-test smallnet_mnist_cifar.prototxt 128 smallnet 
-test smallnet_mnist_cifar.prototxt 256 smallnet 
-test smallnet_mnist_cifar.prototxt 512 smallnet 
--- a/benchmark/caffe/image/run_multi.sh
+++ b/benchmark/caffe/image/run_multi.sh
@ -1,24 +0,0 @@
-#!/bin/bash
-set -e
-
-function test() {
-  cfg=$1
-  batch=$2
-  prefix=$3
-  batch_per_gpu=`expr ${batch} / 4`
-  sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg 
-  sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg 
-  sed -i "1c\net : \"${cfg}\"" solver.prototxt
-  caffe train --solver=solver.prototxt -gpu 0,1,2,3 > logs/${prefix}-4gpu-batch${batch}.log 2>&1
-}
-
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-
-# alexnet
-test alexnet.prototxt 512 alexnet 
-test alexnet.prototxt 1024 alexnet 
-
-# googlnet 
-test googlenet.prototxt 512 googlenet 
--- a/benchmark/caffe/image/smallnet_mnist_cifar.prototxt
+++ b/benchmark/caffe/image/smallnet_mnist_cifar.prototxt
@ -1,198 +0,0 @@
-name: "mnist/cifar"
-input: "data"
-input_dim: 128 
-input_dim: 3
-input_dim: 32 
-input_dim: 32 
-input: "label"
-input_dim: 128 
-input_dim: 1
-input_dim: 1
-input_dim: 1 
-layer {
-  name: "conv1"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv1"
-  param {
-    lr_mult: 1
-  }
-  param {
-    lr_mult: 2
-  }
-  convolution_param {
-    num_output: 32
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.0001
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv1"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-  }
-}
-layer {
-  name: "relu1"
-  type: "ReLU"
-  bottom: "pool1"
-  top: "pool1"
-}
-layer {
-  name: "conv2"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "conv2"
-  param {
-    lr_mult: 1
-  }
-  param {
-    lr_mult: 2
-  }
-  convolution_param {
-    num_output: 32
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layer {
-  name: "relu2"
-  type: "ReLU"
-  bottom: "conv2"
-  top: "conv2"
-}
-layer {
-  name: "pool2"
-  type: "Pooling"
-  bottom: "conv2"
-  top: "pool2"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 2
-  }
-}
-layer {
-  name: "conv3"
-  type: "Convolution"
-  bottom: "pool2"
-  top: "conv3"
-  param {
-    lr_mult: 1
-  }
-  param {
-    lr_mult: 2
-  }
-  convolution_param {
-    num_output: 64
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layer {
-  name: "relu3"
-  type: "ReLU"
-  bottom: "conv3"
-  top: "conv3"
-}
-layer {
-  name: "pool3"
-  type: "Pooling"
-  bottom: "conv3"
-  top: "pool3"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 2
-  }
-}
-layer {
-  name: "ip1"
-  type: "InnerProduct"
-  bottom: "pool3"
-  top: "ip1"
-  param {
-    lr_mult: 1
-  }
-  param {
-    lr_mult: 2
-  }
-  inner_product_param {
-    num_output: 64
-    weight_filler {
-      type: "gaussian"
-      std: 0.1
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layer {
-  name: "ip2"
-  type: "InnerProduct"
-  bottom: "ip1"
-  top: "ip2"
-  param {
-    lr_mult: 1
-  }
-  param {
-    lr_mult: 2
-  }
-  inner_product_param {
-    num_output: 10
-    weight_filler {
-      type: "gaussian"
-      std: 0.1
-    }
-    bias_filler {
-      type: "constant"
-    }
-  }
-}
-layer {
-  name: "accuracy"
-  type: "Accuracy"
-  bottom: "ip2"
-  bottom: "label"
-  top: "accuracy"
-  include {
-    phase: TEST
-  }
-}
-layer {
-  name: "loss"
-  type: "SoftmaxWithLoss"
-  bottom: "ip2"
-  bottom: "label"
-  top: "loss"
-}
--- a/benchmark/caffe/image/solver.prototxt
+++ b/benchmark/caffe/image/solver.prototxt
@ -1,10 +0,0 @@
-net: "alexnet.prototxt"
-base_lr: 0.01
-lr_policy: "fixed"
-display: 20
-max_iter: 200
-momentum: 0.9
-weight_decay: 0.0005
-snapshot: 10000
-snapshot_prefix: "models/caffe_alexnet_train"
-solver_mode: GPU
--- a/benchmark/figs/alexnet-4gpu.png
+++ b/benchmark/figs/alexnet-4gpu.png
--- a/benchmark/figs/alexnet-cpu-infer.png
+++ b/benchmark/figs/alexnet-cpu-infer.png
--- a/benchmark/figs/alexnet-cpu-train.png
+++ b/benchmark/figs/alexnet-cpu-train.png
--- a/benchmark/figs/googlenet-4gpu.png
+++ b/benchmark/figs/googlenet-4gpu.png
--- a/benchmark/figs/googlenet-cpu-infer.png
+++ b/benchmark/figs/googlenet-cpu-infer.png
--- a/benchmark/figs/googlenet-cpu-train.png
+++ b/benchmark/figs/googlenet-cpu-train.png
--- a/benchmark/figs/resnet-cpu-infer.png
+++ b/benchmark/figs/resnet-cpu-infer.png
--- a/benchmark/figs/resnet-cpu-train.png
+++ b/benchmark/figs/resnet-cpu-train.png
--- a/benchmark/figs/rnn_lstm_4gpus.png
+++ b/benchmark/figs/rnn_lstm_4gpus.png
--- a/benchmark/figs/rnn_lstm_cls.png
+++ b/benchmark/figs/rnn_lstm_cls.png
--- a/benchmark/figs/vgg-cpu-infer.png
+++ b/benchmark/figs/vgg-cpu-infer.png
--- a/benchmark/figs/vgg-cpu-train.png
+++ b/benchmark/figs/vgg-cpu-train.png
--- a/benchmark/fluid/Dockerfile
+++ b/benchmark/fluid/Dockerfile
@ -1,30 +0,0 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
-
-# Use UBUNTU_MIRROR can speed up apt-get speed.
-# ARG UBUNTU_MIRROR
-# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
-
-RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
-RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
-
-# IMPORTANT:
-# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
-# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ...
-
-
-RUN pip install -U pip
-RUN pip install -U kubernetes paddlepaddle
-
-RUN pip uninstall -y paddlepaddle && mkdir /workspace
-
-ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
-ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
-RUN chmod +x /usr/bin/paddle_k8s
-
-ADD *.whl /
-RUN pip install /*.whl && rm -f /*.whl 
-
-ENV LD_LIBRARY_PATH=/usr/local/lib
-ADD fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh imagenet_reader.py /workspace/
-ADD models/ /workspace/models/
-
--- a/benchmark/fluid/README.md
+++ b/benchmark/fluid/README.md
@ -1,99 +0,0 @@
-# Fluid Benchmark
-
-This directory contains several models configurations and tools that used to run
-Fluid benchmarks for local and distributed training.
-
-
-## Run the Benchmark
-
-To start, run the following command to get the full help message:
-
-```bash
-python fluid_benchmark.py --help
-```
-
-Currently supported `--model` argument include:
-
-* mnist
-* resnet
-    * you can chose to use different dataset using `--data_set cifar10` or
-      `--data_set flowers`.
-* vgg
-* stacked_dynamic_lstm
-* machine_translation
-
-* Run the following command to start a benchmark job locally:
-    ```bash
-      python fluid_benchmark.py --model mnist --device GPU
-    ```
-    You can choose to use GPU/CPU training. With GPU training, you can specify
-    `--gpus <gpu_num>` to run multi GPU training.
-    You can set async mode parameter server. With async mode, you can specify
-    `--async_mode` to train model asynchronous.
-* Run distributed training with parameter servers:
-    * see [run_fluid_benchmark.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/fluid/run_fluid_benchmark.sh) as an example.
-    * start parameter servers:
-        ```bash
-        PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist  --device GPU --update_method pserver
-        sleep 15
-        ```
-    * start trainers:
-        ```bash
-        PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist  --device GPU --update_method pserver
-        ```
-* Run distributed training using NCCL2
-    ```bash
-    PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3  PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2
-    ```
-
-## Prepare the RecordIO file to Achieve Better Performance
-
-Run the following command will generate RecordIO files like "mnist.recordio" under the path
-and batch_size you choose, you can use batch_size=1 so that later reader can change the batch_size
-at any time using `fluid.batch`.
-
-```bash
-python -c 'from recordio_converter import *; prepare_mnist("data", 1)'
-```
-
-## Run Distributed Benchmark on Kubernetes Cluster
-
-You may need to build a Docker image before submitting a cluster job onto Kubernetes, or you will
-have to start all those processes manually on each node, which is not recommended.
-
-To build the Docker image, you need to choose a paddle "whl" package to run with, you may either
-download it from
-http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_en.html or
-build it by your own. Once you've got the "whl" package, put it under the current directory and run:
-
-```bash
-docker build -t [your docker image name]:[your docker image tag] .
-```
-
-Then push the image to a Docker registry that your Kubernetes cluster can reach.
-
-We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit
-distributed benchmark jobs to your cluster. To generate a job yaml, just run:
-
-```bash
-python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --gpus 8 --device GPU --update_method pserver " --disttype pserver
-```
-
-Then the yaml files are generated under directory `myjob`, you can run:
-
-```bash
-kubectl create -f myjob/
-```
-
-The job shall start.
-
-
-## Notes for Run Fluid Distributed with NCCL2 and RDMA
-
-Before running NCCL2 distributed jobs, please check that whether your node has multiple network
-interfaces, try to add the environment variable `export NCCL_SOCKET_IFNAME=eth0` to use your actual
-network device.
-
-To run high-performance distributed training, you must prepare your hardware environment to be
-able to run RDMA enabled network communication, please check out [this](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/cluster/nccl2_rdma_training.md)
-note for details.
--- a/benchmark/fluid/args.py
+++ b/benchmark/fluid/args.py
@ -1,151 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-
-__all__ = ['parse_args', ]
-
-BENCHMARK_MODELS = [
-    "machine_translation", "resnet", "se_resnext", "vgg", "mnist",
-    "stacked_dynamic_lstm", "resnet_with_preprocess"
-]
-
-
-def parse_args():
-    parser = argparse.ArgumentParser('Fluid model benchmarks.')
-    parser.add_argument(
-        '--model',
-        type=str,
-        choices=BENCHMARK_MODELS,
-        default='resnet',
-        help='The model to run benchmark with.')
-    parser.add_argument(
-        '--batch_size', type=int, default=32, help='The minibatch size.')
-    #  args related to learning rate
-    parser.add_argument(
-        '--learning_rate', type=float, default=0.001, help='The learning rate.')
-    # TODO(wuyi): add "--use_fake_data" option back.
-    parser.add_argument(
-        '--skip_batch_num',
-        type=int,
-        default=5,
-        help='The first num of minibatch num to skip, for better performance test'
-    )
-    parser.add_argument(
-        '--iterations', type=int, default=80, help='The number of minibatches.')
-    parser.add_argument(
-        '--pass_num', type=int, default=100, help='The number of passes.')
-    parser.add_argument(
-        '--data_format',
-        type=str,
-        default='NCHW',
-        choices=['NCHW', 'NHWC'],
-        help='The data data_format, now only support NCHW.')
-    parser.add_argument(
-        '--device',
-        type=str,
-        default='GPU',
-        choices=['CPU', 'GPU'],
-        help='The device type.')
-    parser.add_argument(
-        '--gpus',
-        type=int,
-        default=1,
-        help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
-    # this option is available only for vgg and resnet.
-    parser.add_argument(
-        '--cpus',
-        type=int,
-        default=1,
-        help='If cpus > 1, will set ParallelExecutor to use multiple threads.')
-    parser.add_argument(
-        '--data_set',
-        type=str,
-        default='flowers',
-        choices=['cifar10', 'flowers', 'imagenet'],
-        help='Optional dataset for benchmark.')
-    parser.add_argument(
-        '--infer_only', action='store_true', help='If set, run forward only.')
-    parser.add_argument(
-        '--use_cprof', action='store_true', help='If set, use cProfile.')
-    parser.add_argument(
-        '--use_nvprof',
-        action='store_true',
-        help='If set, use nvprof for CUDA.')
-    parser.add_argument(
-        '--no_test',
-        action='store_true',
-        help='If set, do not test the testset during training.')
-    parser.add_argument(
-        '--memory_optimize',
-        action='store_true',
-        help='If set, optimize runtime memory before start.')
-    parser.add_argument(
-        '--use_fake_data',
-        action='store_true',
-        help='If set ommit the actual read data operators.')
-    parser.add_argument(
-        '--profile', action='store_true', help='If set, profile a few steps.')
-    parser.add_argument(
-        '--update_method',
-        type=str,
-        default='local',
-        choices=['local', 'pserver', 'nccl2'],
-        help='Choose parameter update method, can be local, pserver, nccl2.')
-    parser.add_argument(
-        '--no_split_var',
-        action='store_true',
-        default=False,
-        help='Whether split variables into blocks when update_method is pserver')
-    parser.add_argument(
-        '--async_mode',
-        action='store_true',
-        default=False,
-        help='Whether start pserver in async mode to support ASGD')
-    parser.add_argument(
-        '--use_reader_op',
-        action='store_true',
-        help='Whether to use reader op, and must specify the data path if set this to true.'
-    )
-    parser.add_argument(
-        '--data_path',
-        type=str,
-        default="",
-        help='Directory that contains all the training recordio files.')
-    parser.add_argument(
-        '--test_data_path',
-        type=str,
-        default="",
-        help='Directory that contains all the test data (NOT recordio).')
-    parser.add_argument(
-        '--use_inference_transpiler',
-        action='store_true',
-        help='If set, use inference transpiler to optimize the program.')
-    parser.add_argument(
-        '--no_random',
-        action='store_true',
-        help='If set, keep the random seed and do not shuffle the data.')
-    parser.add_argument(
-        '--reduce_strategy',
-        type=str,
-        choices=['reduce', 'all_reduce'],
-        default='all_reduce',
-        help='Specify the reduce strategy, can be reduce, all_reduce')
-    parser.add_argument(
-        '--fuse_broadcast_op',
-        action='store_true',
-        help='If set, would fuse multiple broadcast operators into one fused_broadcast operator.'
-    )
-    args = parser.parse_args()
-    return args
--- a/benchmark/fluid/check_env.sh
+++ b/benchmark/fluid/check_env.sh
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
--- a/benchmark/fluid/imagenet_reader.py
+++ b/benchmark/fluid/imagenet_reader.py
--- a/Show More
+++ b/Show More