Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into change_manylinux1_Docker

revert-12469-sum_op_dim_fix
minqiyang 7 years ago
commit ba84a6b7ed

@ -23,7 +23,7 @@ repos:
- id: clang-format-with-version-check - id: clang-format-with-version-check
name: clang-format name: clang-format
description: Format files with ClangFormat. description: Format files with ClangFormat.
entry: bash ./.clang_format.hook -i entry: bash ./tools/codestyle/clang_format.hook -i
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
- repo: local - repo: local
@ -52,7 +52,7 @@ repos:
hooks: hooks:
- id: copyright_checker - id: copyright_checker
name: copyright_checker name: copyright_checker
entry: python ./.copyright.hook entry: python ./tools/codestyle/copyright.hook
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$ exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$

@ -27,15 +27,6 @@ script:
# 43min timeout # 43min timeout
paddle/scripts/paddle_docker_build.sh ${JOB} paddle/scripts/paddle_docker_build.sh ${JOB}
if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi; if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
- |
if [[ "$JOB" != "doc" ]]; then exit 0; fi;
# For document only
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export DOCS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/
notifications: notifications:
email: email:
on_success: change on_success: change

@ -22,6 +22,7 @@
| jczaja | Jacek Czaja | | jczaja | Jacek Czaja |
| JiayiFeng | Jia-Yi Feng | | JiayiFeng | Jia-Yi Feng |
| kbinias | Krzysztof Binias | | kbinias | Krzysztof Binias |
| kexinzhao | Ke-Xin Zhao |
| kuke | Yi-Bing Liu | | kuke | Yi-Bing Liu |
| lcy-seso | Ying Cao | | lcy-seso | Ying Cao |
| lipeng-unisound | Peng Li | | lipeng-unisound | Peng Li |
@ -45,6 +46,7 @@
| tianbingsz | Tian-Bing Xu | | tianbingsz | Tian-Bing Xu |
| tpatejko | Tomasz Patejko | | tpatejko | Tomasz Patejko |
| typhoonzero | Yi Wu | | typhoonzero | Yi Wu |
| velconia | Qi-Yang Min |
| wanghaoshuang | Hao-Shuang Wang | | wanghaoshuang | Hao-Shuang Wang |
| wangyang59 | Yang Wang | | wangyang59 | Yang Wang |
| wangzhen-nlp | Zhen Wang | | wangzhen-nlp | Zhen Wang |

@ -55,12 +55,25 @@ option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
option(GLIDE_INSTALL "Download and install go dependencies " ON) option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(WITH_INFERENCE "Compile fluid inference library" ON)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
# PY_VERSION
if(NOT PY_VERSION)
set(PY_VERSION 2.7)
endif()
set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
@ -98,6 +111,11 @@ if(ANDROID OR IOS)
add_definitions(-DPADDLE_MOBILE_INFERENCE) add_definitions(-DPADDLE_MOBILE_INFERENCE)
endif() endif()
if (APPLE OR WIN32)
set(WITH_MKL OFF CACHE STRING
"Disable MKL for building on mac and windows" FORCE)
endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.") "A path setting third party libraries download & build directories.")
@ -120,6 +138,12 @@ else()
set(THIRD_PARTY_BUILD_TYPE Release) set(THIRD_PARTY_BUILD_TYPE Release)
endif() endif()
if(WITH_MKL)
option(MKL_SPLIT_GEMM "PaddlePaddle MKL gemm would split to small ones" OFF)
if (MKL_SPLIT_GEMM)
add_definitions(-DPADDLE_MKL_SPLIT_GEMM)
endif()
endif()
set(WITH_MKLML ${WITH_MKL}) set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN) if (NOT DEFINED WITH_MKLDNN)
if (WITH_MKL AND AVX2_FOUND) if (WITH_MKL AND AVX2_FOUND)
@ -129,9 +153,15 @@ if (NOT DEFINED WITH_MKLDNN)
set(WITH_MKLDNN OFF) set(WITH_MKLDNN OFF)
endif() endif()
endif() endif()
if (REPLACE_ENFORCE_GLOG)
add_definitions("-DREPLACE_ENFORCE_GLOG")
endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
include(external/xbyak) # download xbyak package
include(external/libxsmm) # download, build, install libxsmm
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
@ -147,11 +177,40 @@ include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
include(external/cub)
if(WITH_DISTRIBUTE)
if(WITH_GRPC)
include(external/grpc) include(external/grpc)
message(STATUS "Use grpc framework.")
else()
message(STATUS "Use brpc framework.")
include(external/leveldb)
include(external/brpc)
endif()
endif()
if(WITH_BRPC_RDMA)
message(STATUS "Use brpc with rdma.")
if(WITH_GRPC)
message(FATAL_ERROR "Can't use grpc with brpc rdma.")
endif()
if(NOT WITH_DISTRIBUTE)
message(FATAL_ERROR "Can't use brpc rdma in no distribute env.")
endif()
endif()
include(external/snappy) # download snappy include(external/snappy) # download snappy
include(external/snappystream) include(external/snappystream)
include(external/threadpool) include(external/threadpool)
set(WITH_ANAKIN OFF CACHE STRING "Disable Anakin first, will add it later." FORCE)
if(WITH_GPU)
include(cuda)
include(tensorrt)
include(external/anakin)
endif()
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(cupti) include(cupti)
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
@ -167,7 +226,7 @@ include(inference_lib) # add paddle fluid inference libraries
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") include_directories("${PADDLE_SOURCE_DIR}/paddle/legacy/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c")
@ -180,11 +239,6 @@ set(EXTERNAL_LIBS
${PYTHON_LIBRARIES} ${PYTHON_LIBRARIES}
) )
if(WITH_GPU)
include(cuda)
include(tensorrt)
endif(WITH_GPU)
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
find_package(HIP) find_package(HIP)
include(hip) include(hip)
@ -194,6 +248,10 @@ if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif() endif()
if(WITH_LIBXSMM)
list(APPEND EXTERNAL_LIBS ${LIBXSMM_LIBS})
endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
endif() endif()
@ -208,7 +266,7 @@ add_subdirectory(proto)
if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY) if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY)
# "add_subdirectory(go)" should be placed after the following loine, # "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer. # because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer) add_subdirectory(paddle/legacy/optimizer)
endif() endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
@ -233,7 +291,3 @@ if(WITH_DOC)
find_python_module(recommonmark REQUIRED) find_python_module(recommonmark REQUIRED)
add_subdirectory(doc) add_subdirectory(doc)
endif() endif()
if (WITH_CONTRIB)
add_subdirectory(paddle/contrib)
endif()

@ -159,4 +159,4 @@ This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework) - verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) - verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform) - verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math) - verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math)

@ -23,8 +23,8 @@ ENV HOME /root
COPY ./paddle/scripts/docker/root/ /root/ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --allow-downgrades \ apt-get install -y --allow-downgrades patchelf \
git python-pip python-dev openssh-server bison \ git python-pip python-dev python-opencv openssh-server bison \
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
@ -80,7 +80,7 @@ RUN pip install pre-commit 'ipython==5.3.0' && \
pip install opencv-python pip install opencv-python
#For docstring checker #For docstring checker
RUN pip install pylint pytest astroid isort RUN pip install pylint pytest astroid isort LinkChecker
COPY ./python/requirements.txt /root/ COPY ./python/requirements.txt /root/
RUN pip install -r /root/requirements.txt RUN pip install -r /root/requirements.txt

@ -4,7 +4,6 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@ -19,6 +18,22 @@ learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle. Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
### Latest PaddlePaddle Release: [Fluid 0.14.0](https://github.com/PaddlePaddle/Paddle/tree/v0.14.0)
### Install Latest Stable Release:
```
# Linux CPU
pip install paddlepaddle
# Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7
pip install paddlepaddle-gpu==0.14.0.post87
# Linux GPU cuda8cudnn5
pip install paddlepaddle-gpu==0.14.0.post85
# For installation on other platform, refer to http://paddlepaddle.org/
```
## Features ## Features
- **Flexibility** - **Flexibility**

@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl
caffe/image/logs caffe/image/logs
tensorflow/image/logs tensorflow/image/logs
tensorflow/rnn/logs tensorflow/rnn/logs
fluid/models/*.pyc
fluid/logs
fluid/nohup.out

@ -0,0 +1,31 @@
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
# Use UBUNTU_MIRROR can speed up apt-get speed.
# ARG UBUNTU_MIRROR
# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
# IMPORTANT:
# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ...
RUN pip install -U pip
RUN pip install -U kubernetes paddlepaddle
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python'
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python'
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.imikolov.fetch()" | python'
RUN pip uninstall -y paddlepaddle && mkdir /workspace
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
RUN chmod +x /usr/bin/paddle_k8s
ADD *.whl /
RUN pip install /*.whl && rm -f /*.whl
ENV LD_LIBRARY_PATH=/usr/local/lib
ADD fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh /workspace/
ADD models/ /workspace/models/

@ -28,10 +28,14 @@ Currently supported `--model` argument include:
``` ```
You can choose to use GPU/CPU training. With GPU training, you can specify You can choose to use GPU/CPU training. With GPU training, you can specify
`--gpus <gpu_num>` to run multi GPU training. `--gpus <gpu_num>` to run multi GPU training.
You can set async mode parameter server. With async mode, you can specify
`--async_mode` to train model asynchronous.
* Run distributed training with parameter servers: * Run distributed training with parameter servers:
* see [run_fluid_benchmark.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/fluid/run_fluid_benchmark.sh) as an example.
* start parameter servers: * start parameter servers:
```bash ```bash
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
sleep 15
``` ```
* start trainers: * start trainers:
```bash ```bash
@ -42,13 +46,37 @@ Currently supported `--model` argument include:
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2 PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2
``` ```
## Prepare the RecordIO file to Achieve Better Performance
Run the following command will generate RecordIO files like "mnist.recordio" under the path
and batch_size you choose, you can use batch_size=1 so that later reader can change the batch_size
at any time using `fluid.batch`.
```bash
python -c 'from recordio_converter import *; prepare_mnist("data", 1)'
```
## Run Distributed Benchmark on Kubernetes Cluster ## Run Distributed Benchmark on Kubernetes Cluster
You may need to build a Docker image before submitting a cluster job onto Kubernetes, or you will
have to start all those processes mannually on each node, which is not recommended.
To build the Docker image, you need to choose a paddle "whl" package to run with, you may either
download it from
http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_en.html or
build it by your own. Once you've got the "whl" package, put it under the current directory and run:
```bash
docker build -t [your docker image name]:[your docker image tag] .
```
Then push the image to a Docker registry that your Kubernetes cluster can reach.
We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit
distributed benchmark jobs to your cluster. To generate a job yaml, just run: distributed benchmark jobs to your cluster. To generate a job yaml, just run:
```bash ```bash
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver " --disttype pserver python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --gpus 8 --device GPU --update_method pserver " --disttype pserver
``` ```
Then the yaml files are generated under directory `myjob`, you can run: Then the yaml files are generated under directory `myjob`, you can run:

@ -0,0 +1,134 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
__all__ = ['parse_args', ]
BENCHMARK_MODELS = [
"machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm"
]
def parse_args():
parser = argparse.ArgumentParser('Fluid model benchmarks.')
parser.add_argument(
'--model',
type=str,
choices=BENCHMARK_MODELS,
default='resnet',
help='The model to run benchmark with.')
parser.add_argument(
'--batch_size', type=int, default=32, help='The minibatch size.')
# args related to learning rate
parser.add_argument(
'--learning_rate', type=float, default=0.001, help='The learning rate.')
# TODO(wuyi): add "--use_fake_data" option back.
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test'
)
parser.add_argument(
'--iterations', type=int, default=80, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=100, help='The number of passes.')
parser.add_argument(
'--data_format',
type=str,
default='NCHW',
choices=['NCHW', 'NHWC'],
help='The data data_format, now only support NCHW.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--gpus',
type=int,
default=1,
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
# this option is available only for vgg and resnet.
parser.add_argument(
'--cpus',
type=int,
default=1,
help='If cpus > 1, will use ParallelDo to run, else use Executor.')
parser.add_argument(
'--data_set',
type=str,
default='flowers',
choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
parser.add_argument(
'--no_test',
action='store_true',
help='If set, do not test the testset during training.')
parser.add_argument(
'--memory_optimize',
action='store_true',
help='If set, optimize runtime memory before start.')
parser.add_argument(
'--use_fake_data',
action='store_true',
help='If set ommit the actual read data operators.')
parser.add_argument(
'--profile', action='store_true', help='If set, profile a few steps.')
parser.add_argument(
'--update_method',
type=str,
default='local',
choices=['local', 'pserver', 'nccl2'],
help='Choose parameter update method, can be local, pserver, nccl2.')
parser.add_argument(
'--no_split_var',
action='store_true',
default=False,
help='Whether split variables into blocks when update_method is pserver')
parser.add_argument(
'--async_mode',
action='store_true',
default=False,
help='Whether start pserver in async mode to support ASGD')
parser.add_argument(
'--use_reader_op',
action='store_true',
help='Whether to use reader op, and must specify the data path if set this to true.'
)
parser.add_argument(
'--data_path',
type=str,
default="",
help='Directory that contains all the training recordio files.')
parser.add_argument(
'--use_inference_transpiler',
action='store_true',
help='If set, use inference transpiler to optimize the program.')
parser.add_argument(
'--no_random',
action='store_true',
help='If set, keep the random seed and do not shuffle the data.')
args = parser.parse_args()
return args

File diff suppressed because it is too large Load Diff

@ -17,6 +17,7 @@ import copy
import argparse import argparse
import random import random
import os import os
import copy
from kube_templates import pserver, trainer, envs from kube_templates import pserver, trainer, envs
@ -108,10 +109,9 @@ def gen_job():
tn_container["ports"][0]["containerPort"] = spreadport tn_container["ports"][0]["containerPort"] = spreadport
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname}) envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
envs.append({"name": "TRAINERS", "value": str(args.trainers)}) envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PSERVERS", "value": str(args.pservers)}) envs.append({"name": "PADDLE_PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry}) envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify # NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster. # this settings before you run on your own cluster.
@ -166,17 +166,23 @@ def gen_job():
tn["spec"]["template"]["spec"]["volumes"] = volumes tn["spec"]["template"]["spec"]["volumes"] = volumes
tn_container["volumeMounts"] = volumeMounts tn_container["volumeMounts"] = volumeMounts
ps_container["env"] = envs ps_container["env"] = copy.deepcopy(envs)
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"}) ps_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "PSERVER"
})
tn_container["env"] = envs tn_container["env"] = envs
if args.disttype == "pserver": if args.disttype == "pserver":
tn_container["env"].append({ tn_container["env"].append({
"name": "TRAINING_ROLE", "name": "PADDLE_TRAINING_ROLE",
"value": "TRAINER" "value": "TRAINER"
}) })
elif args.disttype == "nccl2" or args.disttype == "local": elif args.disttype == "nccl2" or args.disttype == "local":
# NCCL2 have no training role, set to plain WORKER # NCCL2 have no training role, set to plain WORKER
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"}) tn_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "WORKER"
})
os.mkdir(args.jobname) os.mkdir(args.jobname)
if args.disttype == "pserver": if args.disttype == "pserver":

@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
return avg_cost, feeding_list return avg_cost, feeding_list
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
lod_t = core.LoDTensor()
lod_t.set(flattened_data, place)
lod_t.set_lod([lod])
return lod_t, lod[-1]
def lodtensor_to_ndarray(lod_tensor): def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims() dims = lod_tensor.get_dims()
ndarray = np.zeros(shape=dims).astype('float32') ndarray = np.zeros(shape=dims).astype('float32')
@ -197,6 +182,8 @@ def lodtensor_to_ndarray(lod_tensor):
def get_model(args): def get_model(args):
if args.use_reader_op:
raise Exception("machine_translation do not support reader op for now.")
embedding_dim = 512 embedding_dim = 512
encoder_size = 512 encoder_size = 512
decoder_size = 512 decoder_size = 512
@ -221,7 +208,7 @@ def get_model(args):
train_batch_generator = paddle.batch( train_batch_generator = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000), paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=args.batch_size) batch_size=args.batch_size * args.gpus)
test_batch_generator = paddle.batch( test_batch_generator = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(

@ -20,6 +20,7 @@ import numpy as np
import argparse import argparse
import time import time
import cProfile import cProfile
import os
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
@ -65,19 +66,49 @@ def cnn_model(data):
def get_model(args): def get_model(args):
# Input data if args.use_reader_op:
filelist = [
os.path.join(args.data_path, f) for f in os.listdir(args.data_path)
]
data_file = fluid.layers.open_files(
filenames=filelist,
shapes=[[-1, 1, 28, 28], (-1, 1)],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
thread_num=args.gpus,
pass_num=args.pass_num)
data_file = fluid.layers.double_buffer(
fluid.layers.batch(
data_file, batch_size=args.batch_size))
images, label = fluid.layers.read_file(data_file)
else:
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
if args.device == 'CPU' and args.cpus > 1:
places = fluid.layers.get_places(args.cpus)
pd = fluid.layers.ParallelDo(places)
with pd.do():
predict = cnn_model(pd.read_input(images))
label = pd.read_input(label)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
batch_acc = fluid.layers.accuracy(input=predict, label=label)
pd.write_output(avg_cost)
pd.write_output(batch_acc)
avg_cost, batch_acc = pd()
avg_cost = fluid.layers.mean(avg_cost)
batch_acc = fluid.layers.mean(batch_acc)
else:
# Train program # Train program
predict = cnn_model(images) predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
# Evaluator # Evaluator
batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=predict, label=label)
batch_acc = fluid.layers.accuracy(
input=predict, label=label, total=batch_size_tensor)
# inference program # inference program
inference_program = fluid.default_main_program().clone() inference_program = fluid.default_main_program().clone()
@ -88,7 +119,7 @@ def get_model(args):
# Reader # Reader
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=args.batch_size) paddle.dataset.mnist.train(), batch_size=args.batch_size * args.gpus)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=args.batch_size) paddle.dataset.mnist.test(), batch_size=args.batch_size)
return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc

@ -19,6 +19,7 @@ from __future__ import print_function
import functools import functools
import numpy as np import numpy as np
import time import time
import os
import cProfile, pstats, StringIO import cProfile, pstats, StringIO
@ -26,6 +27,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
from recordio_converter import imagenet_train, imagenet_test
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
@ -122,40 +124,85 @@ def get_model(args):
else: else:
dshape = [32, 32, 3] dshape = [32, 32, 3]
model = resnet_cifar10 model = resnet_cifar10
else: train_reader = paddle.dataset.cifar.train10()
test_reader = paddle.dataset.cifar.test10()
elif args.data_set == "flowers":
class_dim = 102 class_dim = 102
if args.data_format == 'NCHW': if args.data_format == 'NCHW':
dshape = [3, 224, 224] dshape = [3, 224, 224]
else: else:
dshape = [224, 224, 3] dshape = [224, 224, 3]
model = resnet_imagenet model = resnet_imagenet
train_reader = paddle.dataset.flowers.train()
test_reader = paddle.dataset.flowers.test()
elif args.data_set == "imagenet":
class_dim = 1000
if args.data_format == 'NCHW':
dshape = [3, 224, 224]
else:
dshape = [224, 224, 3]
model = resnet_imagenet
if not args.data_path:
raise Exception(
"Must specify --data_path when training with imagenet")
train_reader = imagenet_train(args.data_path)
test_reader = imagenet_test(args.data_path)
if args.use_reader_op:
filelist = [
os.path.join(args.data_path, f) for f in os.listdir(args.data_path)
]
data_file = fluid.layers.open_files(
filenames=filelist,
shapes=[[-1] + dshape, (-1, 1)],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
thread_num=args.gpus,
pass_num=args.pass_num)
data_file = fluid.layers.double_buffer(
fluid.layers.batch(
data_file, batch_size=args.batch_size))
input, label = fluid.layers.read_file(data_file)
else:
input = fluid.layers.data(name='data', shape=dshape, dtype='float32') input = fluid.layers.data(name='data', shape=dshape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict = model(input, class_dim)
if args.device == 'CPU' and args.cpus > 1:
places = fluid.layers.get_places(args.cpus)
pd = fluid.layers.ParallelDo(places)
with pd.do():
predict = model(pd.read_input(input), class_dim)
label = pd.read_input(label)
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
batch_acc = fluid.layers.accuracy(input=predict, label=label)
batch_size_tensor = fluid.layers.create_tensor(dtype='int64') pd.write_output(avg_cost)
batch_acc = fluid.layers.accuracy( pd.write_output(batch_acc)
input=predict, label=label, total=batch_size_tensor)
avg_cost, batch_acc = pd()
avg_cost = fluid.layers.mean(avg_cost)
batch_acc = fluid.layers.mean(batch_acc)
else:
predict = model(input, class_dim)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
batch_acc = fluid.layers.accuracy(input=predict, label=label)
inference_program = fluid.default_main_program().clone() inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program): with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program( inference_program = fluid.io.get_inference_program(
target_vars=[batch_acc, batch_size_tensor]) target_vars=[batch_acc])
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
train_reader = paddle.batch( batched_train_reader = paddle.batch(
paddle.reader.shuffle( train_reader if args.no_random else paddle.reader.shuffle(
paddle.dataset.cifar.train10() train_reader, buf_size=5120),
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), batch_size=args.batch_size * args.gpus,
buf_size=5120), drop_last=True)
batch_size=args.batch_size) batched_test_reader = paddle.batch(
test_reader = paddle.batch( test_reader, batch_size=args.batch_size, drop_last=True)
paddle.dataset.cifar.test10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), return avg_cost, inference_program, optimizer, batched_train_reader,\
batch_size=args.batch_size) batched_test_reader, batch_acc
return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc

@ -44,6 +44,9 @@ def crop_sentence(reader, crop_size):
def get_model(args): def get_model(args):
if args.use_reader_op:
raise Exception(
"stacked_dynamic_lstm do not support reader op for now.")
lstm_size = 512 lstm_size = 512
emb_dim = 512 emb_dim = 512
crop_size = 1500 crop_size = 1500
@ -115,25 +118,10 @@ def get_model(args):
train_reader = batch( train_reader = batch(
paddle.reader.shuffle( paddle.reader.shuffle(
crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
batch_size=args.batch_size) batch_size=args.batch_size * args.gpus)
test_reader = batch( test_reader = batch(
paddle.reader.shuffle( paddle.reader.shuffle(
crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
batch_size=args.batch_size) batch_size=args.batch_size)
return loss, inference_program, adam, train_reader, test_reader, batch_acc return loss, inference_program, adam, train_reader, test_reader, batch_acc
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res

@ -22,6 +22,7 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import argparse import argparse
import functools import functools
import os
def vgg16_bn_drop(input): def vgg16_bn_drop(input):
@ -65,8 +66,24 @@ def get_model(args):
else: else:
data_shape = [224, 224, 3] data_shape = [224, 224, 3]
# Input data if args.use_reader_op:
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') filelist = [
os.path.join(args.data_path, f) for f in os.listdir(args.data_path)
]
data_file = fluid.layers.open_files(
filenames=filelist,
shapes=[[-1] + data_shape, (-1, 1)],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
thread_num=args.gpus,
pass_num=args.pass_num)
data_file = fluid.layers.double_buffer(
fluid.layers.batch(
data_file, batch_size=args.batch_size))
images, label = fluid.layers.read_file(data_file)
else:
images = fluid.layers.data(
name='data', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# Train program # Train program
@ -95,7 +112,7 @@ def get_model(args):
paddle.dataset.cifar.train10() paddle.dataset.cifar.train10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
buf_size=5120), buf_size=5120),
batch_size=args.batch_size) batch_size=args.batch_size * args.gpus)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.cifar.test10() paddle.dataset.cifar.test10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),

@ -0,0 +1,164 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import random
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.dataset import mnist, cifar, flowers, image
def convert_2_recordio(py_reader, outfilepath, batch_size, shape_data,
shape_label):
num_batches = 0
with fluid.program_guard(fluid.Program(), fluid.Program()):
reader = paddle.batch(py_reader(), batch_size=batch_size)
feeder = fluid.DataFeeder(
feed_list=[ # order is image and label
fluid.layers.data(
name='image', shape=shape_data),
fluid.layers.data(
name='label', shape=shape_label, dtype='int64'),
],
place=fluid.CPUPlace())
num_batches = fluid.recordio_writer.convert_reader_to_recordio_file(
outfilepath, reader, feeder)
return num_batches
def prepare_mnist(outpath, batch_size):
outfilepath = os.path.join(outpath, "mnist.recordio")
convert_2_recordio(mnist.train, outfilepath, batch_size, [784], [1])
def prepare_cifar10(outpath, batch_size):
outfilepath = os.path.join(outpath, "cifar.recordio")
convert_2_recordio(cifar.train10, outfilepath, batch_size, [3, 32, 32], [1])
def prepare_flowers(outpath, batch_size):
outfilepath = os.path.join(outpath, "flowers.recordio")
convert_2_recordio(flowers.train, outfilepath, batch_size, [3, 224, 224],
[1])
def default_mapper(sample):
img, label = sample
img = image.simple_transform(
img, 256, 224, True, mean=[103.94, 116.78, 123.68])
return img.flatten().astype('float32'), label
def imagenet_train(data_dir):
contents = os.listdir(data_dir)
if set(contents) != set(
["train", "train.txt", "val", "val_set", "val.txt", "unzip.sh"]):
raise Exception("Imagenet data contents error!")
img2label = dict()
imgfilelist = []
with open(os.path.join(data_dir, "train.txt")) as fn:
while 1:
l = fn.readline()
if not l:
break
img, lbl = l[:-1].split(" ")
img2label[img] = int(lbl)
imgfilelist.append(img)
# shuffle all, this is slow
random.shuffle(imgfilelist)
def train_reader():
for idx, imgfile in enumerate(imgfilelist):
data = image.load_image(
os.path.join(data_dir, "train", imgfile.lower()))
label = [img2label[imgfile], ]
yield [data, label]
return paddle.reader.map_readers(default_mapper, train_reader)
def imagenet_test(data_dir):
contents = os.listdir(data_dir)
if set(contents) != set(
["train", "train.txt", "val", "val_set", "val.txt", "unzip.sh"]):
raise Exception("Imagenet data contents error!")
img2label = dict()
imgfilelist = []
with open(os.path.join(data_dir, "val.txt")) as fn:
while 1:
l = fn.readline()
if not l:
break
img, lbl = l[:-1].split(" ")
img2label[img] = int(lbl)
imgfilelist.append(img)
def test_reader():
for idx, imgfile in enumerate(imgfilelist):
base_path = os.path.join(data_dir, "val", imgfile.split(".")[0])
image_path = ".".join([base_path, "jpeg"])
data = image.load_image(image_path)
label = [img2label[imgfile], ]
yield [data, label]
return paddle.reader.map_readers(default_mapper, test_reader)
# FIXME(wuyi): delete this when https://github.com/PaddlePaddle/Paddle/pull/11066 is merged
def convert_reader_to_recordio_files(
filename,
batch_per_file,
reader_creator,
feeder,
compressor=core.RecordIOWriter.Compressor.Snappy,
max_num_records=1000,
feed_order=None):
if feed_order is None:
feed_order = feeder.feed_names
f_name, f_ext = os.path.splitext(filename)
assert (f_ext == ".recordio")
lines = []
f_idx = 0
counter = 0
for idx, batch in enumerate(reader_creator()):
lines.append(batch)
if idx >= batch_per_file and idx % batch_per_file == 0:
filename = "%s-%05d%s" % (f_name, f_idx, f_ext)
with fluid.recordio_writer.create_recordio_writer(
filename, compressor, max_num_records) as writer:
for l in lines:
res = feeder.feed(l)
for each in feed_order:
writer.append_tensor(res[each])
writer.complete_append_tensor()
counter += 1
lines = []
f_idx += 1
print("written file: ", filename)
return counter
def prepare_imagenet(inpath, outpath, batch_size):
r = paddle.batch(imagenet_train(inpath), batch_size=batch_size)
feeder = fluid.DataFeeder(
feed_list=[
fluid.layers.data(
name="image", shape=[3, 224, 224]), fluid.layers.data(
name="label", shape=[1], dtype='int64')
],
place=fluid.CPUPlace())
outpath = os.path.join(outpath, "imagenet.recordio")
convert_reader_to_recordio_files(outpath, 10000, r, feeder)

@ -2,6 +2,7 @@
# This script benchmarking the PaddlePaddle Fluid on # This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU. # single thread single GPU.
mkdir -p logs
#export FLAGS_fraction_of_gpu_memory_to_use=0.0 #export FLAGS_fraction_of_gpu_memory_to_use=0.0
export CUDNN_PATH=/paddle/cudnn_v5 export CUDNN_PATH=/paddle/cudnn_v5
@ -35,71 +36,74 @@ nohup stdbuf -oL nvidia-smi \
--format=csv \ --format=csv \
--filename=mem.log \ --filename=mem.log \
-l 1 & -l 1 &
# mnist # mnist
# mnist gpu mnist 128 # mnist gpu mnist 128
FLAGS_benchmark=true stdbuf -oL python fluid/mnist.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=mnist \
--device=GPU \ --device=GPU \
--batch_size=128 \ --batch_size=128 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=500 \ --iterations=500 \
2>&1 | tee -a mnist_gpu_128.log 2>&1 | tee -a logs/mnist_gpu_128.log
# vgg16 # vgg16
# gpu cifar10 128 # gpu cifar10 128
FLAGS_benchmark=true stdbuf -oL python fluid/vgg16.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=vgg16 \
--device=GPU \ --device=GPU \
--batch_size=128 \ --batch_size=128 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a vgg16_gpu_128.log 2>&1 | tee -a logs/vgg16_gpu_128.log
# flowers gpu 128 # flowers gpu 128
FLAGS_benchmark=true stdbuf -oL python fluid/vgg16.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=vgg16 \
--device=GPU \ --device=GPU \
--batch_size=32 \ --batch_size=32 \
--data_set=flowers \ --data_set=flowers \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a vgg16_gpu_flowers_32.log 2>&1 | tee -a logs/vgg16_gpu_flowers_32.log
# resnet50 # resnet50
# resnet50 gpu cifar10 128 # resnet50 gpu cifar10 128
FLAGS_benchmark=true stdbuf -oL python fluid/resnet50.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=resnet \
--device=GPU \ --device=GPU \
--batch_size=128 \ --batch_size=128 \
--data_set=cifar10 \ --data_set=cifar10 \
--model=resnet_cifar10 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a resnet50_gpu_128.log 2>&1 | tee -a logs/resnet50_gpu_128.log
# resnet50 gpu flowers 64 # resnet50 gpu flowers 64
FLAGS_benchmark=true stdbuf -oL python fluid/resnet50.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=resnet \
--device=GPU \ --device=GPU \
--batch_size=64 \ --batch_size=64 \
--data_set=flowers \ --data_set=flowers \
--model=resnet_imagenet \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a resnet50_gpu_flowers_64.log 2>&1 | tee -a logs/resnet50_gpu_flowers_64.log
# lstm # lstm
# lstm gpu imdb 32 # tensorflow only support batch=32 # lstm gpu imdb 32 # tensorflow only support batch=32
FLAGS_benchmark=true stdbuf -oL python fluid/stacked_dynamic_lstm.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=stacked_dynamic_lstm \
--device=GPU \ --device=GPU \
--batch_size=32 \ --batch_size=32 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
--hidden_dim=512 \ 2>&1 | tee -a logs/lstm_gpu_32.log
--emb_dim=512 \
--crop_size=1500 \
2>&1 | tee -a lstm_gpu_32.log
# seq2seq # seq2seq
# seq2seq gpu wmb 128 # seq2seq gpu wmb 128
FLAGS_benchmark=true stdbuf -oL python fluid/machine_translation.py \ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=machine_translation \
--device=GPU \ --device=GPU \
--batch_size=128 \ --batch_size=128 \
--skip_batch_num=5 \ --skip_batch_num=5 \
--iterations=30 \ --iterations=30 \
2>&1 | tee -a lstm_gpu_128.log 2>&1 | tee -a logs/lstm_gpu_128.log

@ -0,0 +1,9 @@
#!/bin/bash
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device CPU --update_method pserver --iterations=10000 &
sleep 15
CUDA_VISIBLE_DEVICES=0,1 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &
CUDA_VISIBLE_DEVICES=2,3 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=1 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &

@ -1,3 +1,5 @@
#!/bin/bash
set -e set -e
function train() { function train() {

@ -1,3 +1,5 @@
#!/bin/bash
set -e set -e
function clock_to_seconds() { function clock_to_seconds() {

@ -1,3 +1,5 @@
#!/bin/bash
set -e set -e
function train() { function train() {

@ -1,3 +1,5 @@
#!/bin/bash
set -e set -e
function clock_to_seconds() { function clock_to_seconds() {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save