Merge remote-tracking branch 'upstream/develop' into fix-4388

mobile_baidu
wangmeng28 8 years ago
commit e65960ede8

1
.gitignore vendored

@ -28,3 +28,4 @@ cmake_install.cmake
paddle/.timestamp paddle/.timestamp
python/paddlepaddle.egg-info/ python/paddlepaddle.egg-info/
paddle/pybind/pybind.h paddle/pybind/pybind.h
python/paddle/v2/framework/tests/tmp/*

@ -31,6 +31,3 @@
- id: go-fmt - id: go-fmt
types: types:
- go - go
- id: gometalinter
types:
- go

@ -30,6 +30,7 @@ addons:
- automake - automake
- libtool - libtool
- ccache - ccache
ssh_known_hosts: 52.76.173.135
before_install: before_install:
- if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
@ -42,6 +43,14 @@ script:
- | - |
timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi;
- |
if [[ "$JOB" != "build_doc" ]]; then exit 0; fi;
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export DOCS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc
notifications: notifications:
email: email:
on_success: change on_success: change

@ -86,6 +86,14 @@ if(ANDROID OR IOS)
"Disable MKLDNN when cross-compiling for Android and iOS" FORCE) "Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
set(WITH_MKLML OFF CACHE STRING set(WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS" FORCE) "Disable MKLML package when cross-compiling for Android and iOS" FORCE)
# Compile PaddlePaddle mobile inference library
if (NOT WITH_C_API)
set(WITH_C_API ON CACHE STRING
"Always compile the C_API when cross-compiling for Android and iOS" FORCE)
endif()
set(MOBILE_INFERENCE ON)
add_definitions(-DPADDLE_MOBILE_INFERENCE)
endif() endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
@ -97,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON)
"different Python interpreter from compiling.") "different Python interpreter from compiling.")
endif() endif()
if(MOBILE_INFERENCE)
set(THIRD_PARTY_BUILD_TYPE MinSizeRel)
else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
@ -113,6 +127,7 @@ include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/nccl)
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
@ -145,7 +160,7 @@ set(EXTERNAL_LIBS
if(WITH_GPU) if(WITH_GPU)
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
endif(WITH_GPU) endif(WITH_GPU)
@ -160,9 +175,11 @@ endif(USE_NNPACK)
add_subdirectory(proto) add_subdirectory(proto)
# "add_subdirectory(go)" should be placed after the following loine, if(NOT MOBILE_INFERENCE)
# because it depends on paddle/optimizer. # "add_subdirectory(go)" should be placed after the following loine,
add_subdirectory(paddle/optimizer) # because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it. # placed after this block, because they depends on it.

@ -1 +1,157 @@
./doc/howto/dev/contribute_to_paddle_en.md # Contribute Code
We sincerely appreciate your contribution. This document explains our workflow and work style.
## Workflow
PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-git-branching-model/). The following steps guide usual contributions.
1. Fork
Our development community has been growing fastly; it doesn't make sense for everyone to write into the official repo. So, please file Pull Requests from your fork. To make a fork, just head over to the GitHub page and click the ["Fork" button](https://help.github.com/articles/fork-a-repo/).
1. Clone
To make a copy of your fork to your local computers, please run
```bash
git clone https://github.com/your-github-account/paddle
cd paddle
```
1. Create the local feature branch
For daily works like adding a new feature or fixing a bug, please open your feature branch before coding:
```bash
git checkout -b my-cool-stuff
```
1. Commit
Before issuing your first `git commit` command, please install [`pre-commit`](http://pre-commit.com/) by running the following commands:
```bash
pip install pre-commit
pre-commit install
```
Our pre-commit configuration requires clang-format 3.8 for auto-formating C/C++ code and yapf for Python.
Once installed, `pre-commit` checks the style of code and documentation in every commit. We will see something like the following when you run `git commit`:
```
➜ git commit
CRLF end-lines remover...............................(no files to check)Skipped
yapf.................................................(no files to check)Skipped
Check for added large files..............................................Passed
Check for merge conflicts................................................Passed
Check for broken symlinks................................................Passed
Detect Private Key...................................(no files to check)Skipped
Fix End of Files.....................................(no files to check)Skipped
clang-formater.......................................(no files to check)Skipped
[my-cool-stuff c703c041] add test file
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 233
```
1. Build and test
Users can build PaddlePaddle natively on Linux and Mac OS X. But to unify the building environment and to make it easy for debugging, the recommended way is [using Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/build_en.md).
1. Keep pulling
An experienced Git user pulls from the official repo often -- daily or even hourly, so they notice conflicts with others work early, and it's easier to resolve smaller conflicts.
```bash
git remote add upstream https://github.com/PaddlePaddle/Paddle
git pull upstream develop
```
1. Push and file a pull request
You can "push" your local work into your forked repo:
```bash
git push origin my-cool-stuff
```
The push allows you to create a pull request, requesting owners of this [official repo](https://github.com/PaddlePaddle/Paddle) to pull your change into the official one.
To create a pull request, please follow [these steps](https://help.github.com/articles/creating-a-pull-request/).
If your change is for fixing an issue, please write ["Fixes <issue-URL>"](https://help.github.com/articles/closing-issues-using-keywords/) in the description section of your pull request. Github would close the issue when the owners merge your pull request.
Please remember to specify some reviewers for your pull request. If you don't know who are the right ones, please follow Github's recommendation.
1. Delete local and remote branches
To keep your local workspace and your fork clean, you might want to remove merged branches:
```bash
git push origin :my-cool-stuff
git checkout develop
git pull upstream develop
git branch -d my-cool-stuff
```
### Code Review
- Please feel free to ping your reviewers by sending them the URL of your pull request via IM or email. Please do this after your pull request passes the CI.
- Please answer reviewers' every comment. If you are to follow the comment, please write "Done"; please give a reason otherwise.
- If you don't want your reviewers to get overwhelmed by email notifications, you might reply their comments by [in a batch](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/).
- Reduce the unnecessary commits. Some developers commit often. It is recommended to append a sequence of small changes into one commit by running `git commit --amend` instead of `git commit`.
## Coding Standard
### Code Style
Our C/C++ code follows the [Google style guide](http://google.github.io/styleguide/cppguide.html).
Our Python code follows the [PEP8 style guide](https://www.python.org/dev/peps/pep-0008/).
Our build process helps to check the code style. In [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/paddle/scripts/docker/build.sh#L42), the entry point of our [builder Docker image](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/Dockerfile#L88), the CMake argument `WITH_STYLE_CHECK` is set to `ON` by default. This flag is on
Please install pre-commit, which automatically reformat the changes to C/C++ and Python code whenever we run `git commit`. To check the whole codebase, we can run the command `pre-commit run -a`, as in the [`check_style.sh` file](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/paddle/scripts/travis/check_style.sh#L30), which is invoked by [our Travis CI configuration](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/.travis.yml#L43).
### Unit Tests
Please remember to add related unit tests.
- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/Primer.md).
- For Python code, please use [Python's standard `unittest` package](http://pythontesting.net/framework/unittest/unittest-introduction/).
### Writing Logs
We use [glog](https://github.com/google/glog) for logging in our C/C++ code.
For general information, please use `LOG`. For debug information, please use [`VLOG`](http://htmlpreview.github.io/?https://github.com/google/glog/blob/master/doc/glog.html#verbose). The reason is at [here](https://groups.google.com/a/chromium.org/d/msg/chromium-dev/3NDNd1KzXeY/AZKMMx37fdQJ).
`VLOG` requires a *verbose level* parameter. For example:
```c++
VLOG(3) << "Operator FC is taking " << num_inputs << "inputs."
```
When we run a PaddlePaddle application or test, we can specify a verbose threshold. For example:
```bash
GLOG_vmodule=buddy_allocator=2 \
GLOG_v=10 \
python \
../python/paddle/v2/framework/tests/test_recurrent_op.py
```
This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the verbose range of 0 to 3, so you will see above example VLOG message, which is in level 3. This suggests that we output overall messages in lower verbose levels, so they display with higher probability. When coding C++, please follow the verbose level convention as follows:
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math)

@ -22,7 +22,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y \ apt-get install -y \
git python-pip python-dev openssh-server bison \ git python-pip python-dev openssh-server bison libnccl-dev \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \ python-matplotlib gcc-4.8 g++-4.8 \

@ -0,0 +1,48 @@
# Benchmark
Machine:
- Server
- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket
- Laptop
- DELL XPS15-9560-R1745: i7-7700HQ 8G 256GSSD
- i5 MacBook Pro (Retina, 13-inch, Early 2015)
- Desktop
- i7-6700k
System: CentOS release 6.3 (Final), Docker 1.12.1.
PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0)
- MKL-DNN tag v0.10
- MKLML 2018.0.20170720
- OpenBLAS v0.2.20
On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
## Benchmark Model
### Server
Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz
Input image size - 3 * 224 * 224, Time: images/second
- VGG-19
| BatchSize | 64 | 128 | 256 |
|--------------|-------| -----| --------|
| OpenBLAS | 7.82 | 8.62 | 10.34 |
| MKLML | 11.02 | 12.86 | 15.33 |
| MKL-DNN | 27.69 | 28.8 | 29.27 |
chart on batch size 128
TBD
- ResNet
- GoogLeNet
### Laptop
TBD
### Desktop
TBD

@ -24,6 +24,10 @@ if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE) add_definitions(-DPADDLE_TYPE_DOUBLE)
endif(WITH_DOUBLE) endif(WITH_DOUBLE)
if(WITH_TESTING)
add_definitions(-DPADDLE_WITH_TESTING)
endif(WITH_TESTING)
if(NOT WITH_TIMER) if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER) add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER) endif(NOT WITH_TIMER)
@ -49,19 +53,20 @@ if(NOT WITH_GOLANG)
endif(NOT WITH_GOLANG) endif(NOT WITH_GOLANG)
if(NOT WITH_GPU) if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
add_definitions(-DPADDLE_WITH_CUDA)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile") message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
endif() endif()
if(NOT CUDNN_FOUND) if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile") message(FATAL_ERROR "Paddle needs cudnn to compile")
endif() endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}") set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")

@ -8,7 +8,7 @@ ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG "master" GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""

@ -36,6 +36,7 @@ ExternalProject_Add(
# change this back to the official Github repo once my PR is # change this back to the official Github repo once my PR is
# merged. # merged.
GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git"
GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -45,11 +46,11 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)

@ -31,6 +31,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git" GIT_REPOSITORY "https://github.com/google/glog.git"
GIT_TAG v0.3.5
PREFIX ${GLOG_SOURCES_DIR} PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -43,12 +44,12 @@ ExternalProject_Add(
-DWITH_GFLAGS=ON -DWITH_GFLAGS=ON
-Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)

@ -56,11 +56,11 @@ IF(WITH_TESTING)
-DBUILD_GMOCK=ON -DBUILD_GMOCK=ON
-Dgtest_disable_pthreads=ON -Dgtest_disable_pthreads=ON
-Dgtest_force_shared_crt=ON -Dgtest_force_shared_crt=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)

@ -0,0 +1,49 @@
include(ExternalProject)
set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
if(WITH_DSO)
# If we use DSO, we do not build nccl, just download the dependencies
set(NCCL_BUILD_COMMAND "")
set(NCCL_INSTALL_COMMAND "")
set(NCCL_INSTALL_DIR "")
else()
# otherwise, we build nccl and link it.
set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
# Note: cuda 8.0 is needed to make nccl
# When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
set(NCCL_BUILD_COMMAND "make -j 8")
set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}")
endif()
ExternalProject_Add(
extern_nccl
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
GIT_TAG "v1.3.4-1"
PREFIX "${NCCL_SOURCE_DIR}"
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
INSTALL_DIR "${NCCL_INSTALL_DIR}"
TEST_COMMAND ""
)
if(WITH_DSO)
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
add_library(nccl STATIC ${dummyfile})
else()
add_library(nccl INTERFACE)
endif()
else()
add_library(nccl STATIC IMPORTED GLOBAL)
set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
endif()
add_dependencies(nccl extern_nccl)

@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${OPTIONAL_ARGS} ${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_LIBDIR=lib
CMAKE_CACHE_ARGS CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
${OPTIONAL_CACHE_ARGS} ${OPTIONAL_CACHE_ARGS}

@ -35,6 +35,7 @@ ExternalProject_Add(
extern_warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09
PREFIX ${WARPCTC_SOURCES_DIR} PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -48,9 +49,9 @@ ExternalProject_Add(
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
-DBUILD_SHARED=ON -DBUILD_SHARED=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
) )

@ -42,11 +42,11 @@ ExternalProject_Add(
-DBUILD_SHARED_LIBS=OFF -DBUILD_SHARED_LIBS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_MACOSX_RPATH=ON -DCMAKE_MACOSX_RPATH=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
LIST(APPEND external_project_dependencies zlib) LIST(APPEND external_project_dependencies zlib)

@ -389,13 +389,60 @@ function(go_test TARGET_NAME)
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endfunction(go_test) endfunction(go_test)
# Modification of standard 'protobuf_generate_cpp()' with protobuf-lite support
# Usage:
# paddle_protobuf_generate_cpp(<proto_srcs> <proto_hdrs> <proto_files>)
function(paddle_protobuf_generate_cpp SRCS HDRS)
if(NOT ARGN)
message(SEND_ERROR "Error: paddle_protobuf_generate_cpp() called without any proto files")
return()
endif()
set(${SRCS})
set(${HDRS})
if (MOBILE_INFERENCE)
set(EXTRA_FLAG "lite:")
else()
set(EXTRA_FLAG "")
endif()
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
list(APPEND ${SRCS} "${_protobuf_protoc_src}")
list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
add_custom_command(
OUTPUT "${_protobuf_protoc_src}"
"${_protobuf_protoc_hdr}"
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
-I${CMAKE_CURRENT_SOURCE_DIR}
--cpp_out "${EXTRA_FLAG}${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL}
DEPENDS ${ABS_FIL} protoc
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM )
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
function(proto_library TARGET_NAME) function(proto_library TARGET_NAME)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(proto_srcs) set(proto_srcs)
set(proto_hdrs) set(proto_hdrs)
protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS})
cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf)
endfunction() endfunction()

@ -73,25 +73,43 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links() generate_rdma_links()
endif() endif()
target_circle_link_libraries(${TARGET_NAME} if(MOBILE_INFERENCE)
ARCHIVE_START target_circle_link_libraries(${TARGET_NAME}
paddle_gserver ARCHIVE_START
paddle_function paddle_gserver
ARCHIVE_END paddle_function
paddle_pserver ARCHIVE_END
paddle_trainer_lib paddle_math
paddle_network paddle_utils
paddle_math paddle_parameter
paddle_utils paddle_proto
paddle_parameter paddle_cuda
paddle_proto ${EXTERNAL_LIBS}
paddle_cuda ${CMAKE_THREAD_LIBS_INIT}
paddle_optimizer ${CMAKE_DL_LIBS}
${EXTERNAL_LIBS} ${RDMA_LD_FLAGS}
${CMAKE_THREAD_LIBS_INIT} ${RDMA_LIBS})
${CMAKE_DL_LIBS} else()
${RDMA_LD_FLAGS} target_circle_link_libraries(${TARGET_NAME}
${RDMA_LIBS}) ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(ANDROID) if(ANDROID)
target_link_libraries(${TARGET_NAME} log) target_link_libraries(${TARGET_NAME} log)

@ -21,7 +21,7 @@ Model Config API
trainer_config_helpers/optimizers.rst trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst trainer_config_helpers/evaluators.rst

@ -345,6 +345,11 @@ clip
.. autoclass:: paddle.v2.layer.clip .. autoclass:: paddle.v2.layer.clip
:noindex: :noindex:
resize
------
.. autoclass:: paddle.v2.layer.resize
:noindex:
slope_intercept slope_intercept
--------------- ---------------
.. autoclass:: paddle.v2.layer.slope_intercept .. autoclass:: paddle.v2.layer.slope_intercept

@ -125,3 +125,8 @@ simple_attention
:members: simple_attention :members: simple_attention
:noindex: :noindex:
dot_product_attention
---------------------
.. automodule:: paddle.v2.networks
:members: dot_product_attention
:noindex:

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

@ -0,0 +1,23 @@
# Executor Design Doc
## Motivation
We use executor to do the runtime evaluation of a `ProgramDesc`.
## Overview
An executor takes a `ProgramDesc`, a `block_id` and a `Scope`. The `ProgramDesc` is a list of blocks and each block contains the protobuf definition of all the parameters and operators. The `block_id` specifies the entrance block. And the `Scope` is the container of all the variable instance, which is persistent throughout different runs.
### What does executor do?
It evaluates all the operators in the `block_id`th block of a `ProgramDesc`.
### What does executor NOT do?
It does not do runtime optimization, meaning intelligently parse the dependency of each op a choose which one to be run and in which order they should be run.
It does not do graph partitioning, meaning dividing the `ProgramDesc` into several small pieces and executing them on different devices.
## Implementation
`Executor` evaluates a `ProgramDesc`. Essentially, it instantiates Variables and Operators, then run all the operators in sequence. [[code]](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save