remove conflict

mobile_baidu
chengduoZH 7 years ago
commit ba7db29df1

@ -30,6 +30,7 @@ addons:
- automake
- libtool
- ccache
ssh_known_hosts: 52.76.173.135
before_install:
- if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
@ -42,6 +43,14 @@ script:
- |
timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi;
- |
if [[ "$JOB" != "build_doc" ]]; then exit 0; fi;
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export DOCS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc
notifications:
email:
on_success: change

@ -1 +1,157 @@
./doc/howto/dev/contribute_to_paddle_en.md
# Contribute Code
We sincerely appreciate your contribution. This document explains our workflow and work style.
## Workflow
PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-git-branching-model/). The following steps guide usual contributions.
1. Fork
Our development community has been growing fastly; it doesn't make sense for everyone to write into the official repo. So, please file Pull Requests from your fork. To make a fork, just head over to the GitHub page and click the ["Fork" button](https://help.github.com/articles/fork-a-repo/).
1. Clone
To make a copy of your fork to your local computers, please run
```bash
git clone https://github.com/your-github-account/paddle
cd paddle
```
1. Create the local feature branch
For daily works like adding a new feature or fixing a bug, please open your feature branch before coding:
```bash
git checkout -b my-cool-stuff
```
1. Commit
Before issuing your first `git commit` command, please install [`pre-commit`](http://pre-commit.com/) by running the following commands:
```bash
pip install pre-commit
pre-commit install
```
Our pre-commit configuration requires clang-format 3.8 for auto-formating C/C++ code and yapf for Python.
Once installed, `pre-commit` checks the style of code and documentation in every commit. We will see something like the following when you run `git commit`:
```
➜ git commit
CRLF end-lines remover...............................(no files to check)Skipped
yapf.................................................(no files to check)Skipped
Check for added large files..............................................Passed
Check for merge conflicts................................................Passed
Check for broken symlinks................................................Passed
Detect Private Key...................................(no files to check)Skipped
Fix End of Files.....................................(no files to check)Skipped
clang-formater.......................................(no files to check)Skipped
[my-cool-stuff c703c041] add test file
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 233
```
1. Build and test
Users can build PaddlePaddle natively on Linux and Mac OS X. But to unify the building environment and to make it easy for debugging, the recommended way is [using Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/build_en.md).
1. Keep pulling
An experienced Git user pulls from the official repo often -- daily or even hourly, so they notice conflicts with others work early, and it's easier to resolve smaller conflicts.
```bash
git remote add upstream https://github.com/PaddlePaddle/Paddle
git pull upstream develop
```
1. Push and file a pull request
You can "push" your local work into your forked repo:
```bash
git push origin my-cool-stuff
```
The push allows you to create a pull request, requesting owners of this [official repo](https://github.com/PaddlePaddle/Paddle) to pull your change into the official one.
To create a pull request, please follow [these steps](https://help.github.com/articles/creating-a-pull-request/).
If your change is for fixing an issue, please write ["Fixes <issue-URL>"](https://help.github.com/articles/closing-issues-using-keywords/) in the description section of your pull request. Github would close the issue when the owners merge your pull request.
Please remember to specify some reviewers for your pull request. If you don't know who are the right ones, please follow Github's recommendation.
1. Delete local and remote branches
To keep your local workspace and your fork clean, you might want to remove merged branches:
```bash
git push origin :my-cool-stuff
git checkout develop
git pull upstream develop
git branch -d my-cool-stuff
```
### Code Review
- Please feel free to ping your reviewers by sending them the URL of your pull request via IM or email. Please do this after your pull request passes the CI.
- Please answer reviewers' every comment. If you are to follow the comment, please write "Done"; please give a reason otherwise.
- If you don't want your reviewers to get overwhelmed by email notifications, you might reply their comments by [in a batch](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/).
- Reduce the unnecessary commits. Some developers commit often. It is recommended to append a sequence of small changes into one commit by running `git commit --amend` instead of `git commit`.
## Coding Standard
### Code Style
Our C/C++ code follows the [Google style guide](http://google.github.io/styleguide/cppguide.html).
Our Python code follows the [PEP8 style guide](https://www.python.org/dev/peps/pep-0008/).
Our build process helps to check the code style. In [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/paddle/scripts/docker/build.sh#L42), the entry point of our [builder Docker image](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/Dockerfile#L88), the CMake argument `WITH_STYLE_CHECK` is set to `ON` by default. This flag is on
Please install pre-commit, which automatically reformat the changes to C/C++ and Python code whenever we run `git commit`. To check the whole codebase, we can run the command `pre-commit run -a`, as in the [`check_style.sh` file](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/paddle/scripts/travis/check_style.sh#L30), which is invoked by [our Travis CI configuration](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/.travis.yml#L43).
### Unit Tests
Please remember to add related unit tests.
- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/Primer.md).
- For Python code, please use [Python's standard `unittest` package](http://pythontesting.net/framework/unittest/unittest-introduction/).
### Writing Logs
We use [glog](https://github.com/google/glog) for logging in our C/C++ code.
For general information, please use `LOG`. For debug information, please use [`VLOG`](http://htmlpreview.github.io/?https://github.com/google/glog/blob/master/doc/glog.html#verbose). The reason is at [here](https://groups.google.com/a/chromium.org/d/msg/chromium-dev/3NDNd1KzXeY/AZKMMx37fdQJ).
`VLOG` requires a *verbose level* parameter. For example:
```c++
VLOG(3) << "Operator FC is taking " << num_inputs << "inputs."
```
When we run a PaddlePaddle application or test, we can specify a verbose threshold. For example:
```bash
GLOG_vmodule=buddy_allocator=2 \
GLOG_v=10 \
python \
../python/paddle/v2/framework/tests/test_recurrent_op.py
```
This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the verbose range of 0 to 3, so you will see above example VLOG message, which is in level 3. This suggests that we output overall messages in lower verbose levels, so they display with higher probability. When coding C++, please follow the verbose level convention as follows:
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math)

@ -0,0 +1,48 @@
# Benchmark
Machine:
- Server
- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket
- Laptop
- DELL XPS15-9560-R1745: i7-7700HQ 8G 256GSSD
- i5 MacBook Pro (Retina, 13-inch, Early 2015)
- Desktop
- i7-6700k
System: CentOS release 6.3 (Final), Docker 1.12.1.
PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0)
- MKL-DNN tag v0.10
- MKLML 2018.0.20170720
- OpenBLAS v0.2.20
On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
## Benchmark Model
### Server
Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
Input image size - 3 * 224 * 224, Time: images/second
- VGG-19
| BatchSize | 64 | 128 | 256 |
|--------------|-------| -----| --------|
| OpenBLAS | 7.82 | 8.62 | 10.34 |
| MKLML | 11.02 | 12.86 | 15.33 |
| MKL-DNN | 27.69 | 28.8 | 29.27 |
chart on batch size 128
TBD
- ResNet
- GoogLeNet
### Laptop
TBD
### Desktop
TBD

Binary file not shown.

After

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 156 B

@ -0,0 +1,72 @@
# Averaging Parameter in PaddlePaddle
## Why Averaging
In a large scale machine learning setup where the size of the training data is huge, it could take us a large number of iterations over the training data before we can achieve the optimal values of parameters of our model. Looking at the problem setup, it is desirable if we can obtain the optimal values of parameters by going through the data in as few passes as we can.
Polyak and Juditsky (1992) showed that the test performance of simple average of parameters obtained by Stochastic Gradient Descent (SGD) is as good as that of parameter values that are obtained by training the model over and over again, over the training dataset.
Hence, to accelerate the speed of Stochastic Gradient Descent, Averaged Stochastic Gradient Descent (ASGD) was proposed in Polyak and Juditsky (1992). For ASGD, the running average of parameters obtained by SGD, is used as the estimator for <img src="./images/theta_star.gif"/><br/> . The averaging is done as follows:
<img src="./images/asgd.gif" align="center"/><br/>
We propose averaging for any optimizer similar to how ASGD performs it, as mentioned above.
### How to perform Parameter Averaging in PaddlePaddle
Parameter Averaging in PaddlePaddle works in the following way during training :
1. It will take in an instance of a normal optimizer as an input, e.g. RMSPropOptimizer
2. The optimizer itself is responsible for updating the parameters.
3. The ParameterAverageOptimizer maintains a separate copy of the parameters for itself:
1. In concept, the values of this copy are the average of the values of the parameters in the most recent N batches.
2. However, saving all the N instances of the parameters in memory is not feasible.
3. Therefore, an approximation algorithm is used.
Hence, overall we have have two copies of the parameters: one for the optimizer itself, and one for the ParameterAverageOptimizer. The former should be used in back propagation, while the latter should be used during testing and should be saved.
During the testing/ saving the model phase, we perform the following steps:
1. Perform the delayed operations.
2. Save current values of the parameters to a temporary variable.
3. Replace the values of the parameters with the averaged values.
4. Perform testing and/or save the parameters.
5. Restore the values of the parameters once done.
### How to implement Averaging of Parameter in PaddlePaddle
We can add the ParameterAverageOptimizer op to the graph through Python API. Using this approach, we manually add this op to the graph and direct the output of the optimizer op to this op during training.
**Advantages**:
- Allows for greater flexibility to the users of PaddlePaddle. Using this approach, the users can plug different optimizers into ParameterAverageOptimizer by passing in the optimizer to the op.
- Makes it easy for the users to customize and extend the framework.
**Disadvantages**:
- Implementation requires re-writing the averaging methodology in Python.
### Low-Level implementation
In the new design, we propose to create a new operation for averaging parameter updates (ParameterAverageOptimizer). For now, we can add an op that takes in the following as input:
- the optimizer
- the window_size to keep the updates
The ParameterAverageOptimizer op can be like any other operator with its own CPU/GPU implementation either using Eigen or separate CPU and GPU kernels. As the initial implementation, we can implement the kernel using Eigen following the abstraction pattern implemented for [Operators](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/rmsprop_op.h). We also want to support the case when the Trainer/Optimizer runs on the GPU while ParameterAverageOptimizer runs on a CPU.
The idea of building an op for averaging is in sync with the refactored PaddlePaddle philosophy of using operators to represent any computation unit. The way the op will be added to the computation graph will be decided by the [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) in Python API.
### Python API implementation for ParameterAverageOptimizer
Based on Polyak and Juditsky (1992), we can generalize the averaging of updates to any optimizer. The input to the op would be the following:
- Any optimizer (RMSProp , AdaGrad etc.)
- A window size. The op keeps accumulating updated parameter values over a window of N batches and takes an average. Move the averaged value to a buffer when window is full to avoid loss of precision.
Using the ParameterAverageOptimizer op, any user can add the operation to their computation graphs. However, this will require a lot of lines of code and we should design Python APIs that support averaging. As per the PaddlePaddle [Python API design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md), the layer functions are responsible for creating operators, operator parameters and variables. Since ParameterAverageOptimizer will be an operator, it makes sense to create it in the layer functions.
We will have a wrapper written in Python that will support the functionality and implement the actual core computation in C++ core as we have done for other [Optimizers](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/rmsprop_op.cc)
#### Creation of the ParameterAverageOptimizer operator
There are two ways for creating the ParameterAverageOptimizer op:
1. We create the op immediately while building the computation graph.
2. We add the op in a lazy manner, just before the backward pass, similar to the way the optimization ops are added.
The proposal is to add the op immediately while building the computation graph.
#### High-level API
In PaddlePaddle Python API, users will primarily rely on [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) to create neural network layers. Hence, we also need to provide parameter average functionality in layer functions.

@ -75,7 +75,7 @@ PaddlePaddle目前支持8种learning_rate_schedule这8种learning_rate_schedu
optimizer = paddle.optimizer.Adam(
learning_rate=1e-3,
learning_rate_schedule="manual",
learning_rate_schedule="pass_manual",
learning_rate_args="1:1.0,2:0.9,3:0.8",)
在该示例中当已训练pass数小于等于1时学习率为 :code:`1e-3 * 1.0`当已训练pass数大于1小于等于2时学习率为 :code:`1e-3 * 0.9`当已训练pass数大于2时学习率为 :code:`1e-3 * 0.8`

@ -0,0 +1,153 @@
# Build PaddlePaddle for Android
There are two approaches to build PaddlePaddle for Android: using Docker and on Linux without Docker.
## Cross-Compiling Using Docker
Docker-based cross-compiling is the recommended approach because Docker runs on all major operating systems, including Linux, Mac OS X, and Windows.
### Build the Docker Image
The following steps pack all the tools that we need to build PaddlePaddle into a Docker image.
```bash
$ git clone https://github.com/PaddlePaddle/Paddle.git
$ cd Paddle
$ docker build -t paddle:dev-android . -f Dockerfile.android
```
### Build the Inference Library
We can run the Docker image we just created to build the inference library of PaddlePaddle for Android using the command below:
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" paddle:dev-android
```
The Docker image accepts two arguments `ANDROID_ABI` and `ANDROID_API`:
| Argument | Optional Values | Default |
|-----------------|-------------------------|---------|
|`ANDROID_ABI` |`armeabi-v7a, arm64-v8a` | `armeabi-v7a` |
|`ANDROID_API` |`>= 21` | `21` |
The ARM-64 architecture (`arm64-v8a`) requires at least level 21 of Android API.
The default entry-point of the Docker image, [`paddle/scripts/docker/build_android.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh) generates the [Android cross-compiling standalone toolchain](https://developer.android.com/ndk/guides/standalone_toolchain.html) based on the argument: `ANDROID_ABI` or `ANDROID_API`. For information about other configuration arguments, please continue reading.
The above command generates and outputs the inference library in `$PWD/install_android` and puts third-party libraries in `$PWD/install_android/third_party`.
## Cross-Compiling on Linux
The Linux-base approach to cross-compile is to run steps in `Dockerfile.android` manually on a Linux x64 computer.
### Setup the Environment
To build for Android's, we need [Android NDK](
https://developer.android.com/ndk/downloads/index.html):
```bash
wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip
unzip -q android-ndk-r14b-linux-x86_64.zip
```
Android NDK includes everything we need to build the [*standalone toolchain*](https://developer.android.com/ndk/guides/standalone_toolchain.html), which in then used to build PaddlePaddle for Android. (We plan to remove the intermediate stage of building the standalone toolchain in the near future.)
- To build the standalone toolchain for `armeabi-v7a` and Android API level 21:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm --platform=android-21 --install-dir=your/path/to/arm_standalone_toolchain
```
The generated standalone toolchain will be in `your/path/to/arm_standalone_toolchain`.
- To build the standalone toolchain for `arm64-v8a` and Android API level 21:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain
```
The generated standalone toolchain will be in `your/path/to/arm64_standalone_toolchain`.
**Please be aware that the minimum level of Android API required by PaddlePaddle is 21.**
### Cross-Compiling Arguments
CMake supports [choosing the toolchain](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling). PaddlePaddle provides [`android.cmake`](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake), which configures the Android cross-compiling toolchain for CMake. `android.cmake` is not required for CMake >= 3.7, which support Android cross-compiling. PaddlePaddle detects the CMake version, for those newer than 3.7, it uses [the official version](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling).
Some other CMake arguments you need to know:
- `CMAKE_SYSTEM_NAME` must be `Android`. This tells PaddlePaddle's CMake system to cross-compile third-party dependencies. This also changes some other CMake arguments like `WITH_GPU=OFF`, `WITH_AVX=OFF`, `WITH_PYTHON=OFF`, and `WITH_RDMA=OFF`.
- `WITH_C_API` must be `ON`, to build the C-based inference library for Android.
- `WITH_SWIG_PY` must be `OFF` because the Android platform doesn't support SWIG-based API.
Some Android-specific arguments:
- `ANDROID_STANDALONE_TOOLCHAIN`: the absolute path of the Android standalone toolchain, or the path relative to the CMake build directory. PaddlePaddle's CMake extensions would derive the cross-compiler, sysroot and Android API level from this argument.
- `ANDROID_TOOLCHAIN`: could be `gcc` or `clang`. The default value is `clang`.
- For CMake >= 3.7, it should anyway be `clang`. For older versions, it could be `gcc`.
- Android's official `clang` requires `glibc` >= 2.15.
- `ANDROID_ABI`: could be `armeabi-v7a` or `arm64-v8a`. The default value is `armeabi-v7a`.
- `ANDROID_NATIVE_API_LEVEL`: could be derived from the value of `ANDROID_STANDALONE_TOOLCHAIN`.
- `ANROID_ARM_MODE`:
- could be `ON` or `OFF`, and defaults to `ON`, when `ANDROID_ABI=armeabi-v7a`;
- no need to specify when `ANDROID_ABI=arm64-v8a`.
- `ANDROID_ARM_NEON`: indicates if to use NEON instructions.
- could be `ON` or `OFF`, and defaults to `ON`, when `ANDROID_ABI=armeabi-v7a`;
- no need to specify when `ANDROID_ABI=arm64-v8a`.
Other useful arguments:
- `USE_EIGEN_FOR_BLAS`: indicates if using Eigen. Could be `ON` or `OFF`, defaults to `OFF`.
- `HOST_C/CXX_COMPILER`: specifies the host compiler, which is used to build the host-specific protoc and target-specific OpenBLAS. It defaults to the value of the environment variable `CC`, or `cc`.
Some frequent configurations for your reference:
```bash
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/arm_standalone_toolchain \
-DANDROID_ABI=armeabi-v7a \
-DANDROID_ARM_NEON=ON \
-DANDROID_ARM_MODE=ON \
-DUSE_EIGEN_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
```
```
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/arm64_standalone_toolchain \
-DANDROID_ABI=arm64-v8a \
-DUSE_EIGEN_FOR_BLAS=OFF \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
```
There are some other arguments you might want to configure.
- `CMAKE_BUILD_TYPE=MinSizeRel` minimizes the size of library.
- `CMAKE_BUILD_TYPE-Release` optimizes the runtime performance.
Our own tip for performance optimization to use clang and Eigen or OpenBLAS:
- `CMAKE_BUILD_TYPE=Release`
- `ANDROID_TOOLCHAIN=clang`
- `USE_EIGEN_BLAS=ON` for `armeabi-v7a`, or `USE_EIGEN_FOR_BLAS=OFF` for `arm64-v8a`.
### Build and Install
After running `cmake`, we can run `make; make install` to build and install.
Before building, you might want to remove the `third_party` and `build` directories including pre-built libraries for other architectures.
After buildingin the directory `CMAKE_INSTALL_PREFIX`, you will find three sub-directories:
- `include`: the header file of the inference library,
- `lib`: the inference library built for various Android ABIs,
- `third_party`: dependent third-party libraries built for Android.

@ -1,7 +1,7 @@
# 构建Android平台上的PaddlePaddle库
用户可通过如下两种方式交叉编译Android平台上适用的PaddlePaddle库
- 基于Docker容器的编译方式
- 基于Docker容器的编译方式
- 基于Linux交叉编译环境的编译方式
## 基于Docker容器的编译方式
@ -26,14 +26,14 @@ Android的Docker开发镜像向用户提供两个可配置的参数
|`ANDROID_API` |`>= 21` | `21` |
- 编译`armeabi-v7a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev
```
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev
```
- 编译`arm64-v8a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev
```
- 编译`arm64-v8a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev
```
执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置并且会根据`ANDROID_ABI`和`ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a``ANDROID_API<21`Docker使`Android API 21`****DockerPaddlePaddleC-API`$PWD/install_android``$PWD/install_android/third_party`
@ -82,16 +82,16 @@ CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cm
Android平台可选配置参数
- `ANDROID_STANDALONE_TOOLCHAIN`独立工具链所在的绝对路径或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动推导和设置需要使用的交叉编译器、sysroot、以及Android API级别否则用户需要在cmake时手动设置这些值。无默认值。
- `ANDROID_TOOLCHAIN`,目标工具链。可设置`gcc/clang`,默认值为`clang`。
- CMake 3.7以上,将会始终使用`clang`工具链CMake 3.7以下,可设置`ANDROID_TOOLCHAIN=gcc`以使用`gcc`工具链。
- `ANDROID_TOOLCHAIN`,目标工具链。可设置`gcc/clang`,默认值为`clang`。
- CMake 3.7以上,将会始终使用`clang`工具链CMake 3.7以下,可设置`ANDROID_TOOLCHAIN=gcc`以使用`gcc`工具链。
- Android官方提供的`clang`编译器要求系统支持`GLIBC 2.15`以上。
- `ANDROID_ABI`目标架构ABI。目前支持`armeabi-v7a`和`arm64-v8a`,默认值为`armeabi-v7a`。
- `ANDROID_NATIVE_API_LEVEL`工具链的Android API级别。若没有显式设置PaddlePaddle将根据`ANDROID_STANDALONE_TOOLCHAIN`的值自动推导得到。
- `ANROID_ARM_MODE`是否使用ARM模式。
- `ANDROID_ABI=armeabi-v7a`时,可设置`ON/OFF`,默认值为`ON`
- `ANROID_ARM_MODE`是否使用ARM模式。
- `ANDROID_ABI=armeabi-v7a`时,可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ABI=arm64-v8a`时,不需要设置。
- `ANDROID_ARM_NEON`是否使用NEON指令。
- `ANDROID_ABI=armeabi-v7a`时,可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ARM_NEON`是否使用NEON指令。
- `ANDROID_ABI=armeabi-v7a`时,可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ABI=arm64-v8a`时,不需要设置。
其他配置参数:
@ -119,7 +119,7 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/arm64_standalone_toolchain \
-DANDROID_ABI=arm64-v8a \
-DUSE_EIGEN_FOR_BLAS=OFF \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
@ -128,8 +128,8 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。
**性能TIPS**为了达到最快的计算速度在CMake参数配置上有以下建议
- 设置`CMAKE_BUILD_TYPE`为`Release`
- 使用`clang`编译工具链
- 设置`CMAKE_BUILD_TYPE`为`Release`
- 使用`clang`编译工具链
- `armeabi-v7a`时,设置`USE_EIGEN_BLAS=ON`使用Eigen进行矩阵计算`arm64-v8a`时,设置`USE_EIGEN_FOR_BLAS=OFF`使用OpenBLAS进行矩阵计算
### 编译和安装

@ -0,0 +1,99 @@
# 构建iOS平台上的PaddlePaddle库
交叉编译iOS平台上适用的PaddlePaddle库需要在MacOS系统上进行。本文的将介绍在MacOS上从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
Apple官方为iOS开发提供了完整的交叉编译工具和集成开发环境用户从App Store下载安装Xcode即可。也可自行前往官网下载[Xcode](https://developer.apple.com/cn/xcode/)。安装完成之后,可在命令行执行`xcodebuild -version`,判断是否安装成功。
```bash
$ xcodebuild -version
Xcode 9.0
Build version 9A235
```
## 配置交叉编译参数
PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/ios.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/ios.cmake),以提供一些默认的编译器和编译参数配置。
交叉编译iOS版本的PaddlePaddle库时有一些必须配置的参数
- `CMAKE_SYSTEM_NAME`CMake编译的目标平台必须设置为`iOS`。在设置`CMAKE_SYSTEM_NAME=iOS`后PaddlePaddle的CMake系统会自动编译所有的第三方依赖库并且强制设置一些PaddlePaddle参数的值`WITH_C_API=ON`、`WITH_GPU=OFF`、`WITH_AVX=OFF`、`WITH_PYTHON=OFF`、`WITH_RDMA=OFF`)。
- `WITH_C_API`是否编译C-API预测库必须设置为ON。在iOS平台上只支持使用C-API来预测。
- `WITH_SWIG_PY`必须设置为ON。在iOS平台上不支持通过swig调用来训练或者预测。
iOS平台可选配置参数
- `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`。
- `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示:
| IOS_PLATFORM | IOS_ARCH |
|--------------|----------------------|
| OS | armv7, armv7s, arm64 (默认) |
| SIMULATOR | i386, x86_64 (默认) |
- `IOS_DEPLOYMENT_TARGET`最小的iOS部署版本默认值为`7.0`。
- `IOS_ENABLE_BITCODE`,是否使能[Bitcode](https://developer.apple.com/library/content/documentation/IDEs/Conceptual/AppDistributionGuide/AppThinning/AppThinning.html#//apple_ref/doc/uid/TP40012582-CH35-SW3),可设置`ON/OFF`,默认值为`ON`。
- `IOS_USE_VECLIB_FOR_BLAS`,是否使用[vecLib](https://developer.apple.com/documentation/accelerate/veclib)框架进行BLAS矩阵计算可设置`ON/OFF`,默认值为`OFF`。
- `IOS_DEVELOPMENT_ROOT``Developer`目录,可显式指定为`/path/to/platform/Developer`。若未显式指定PaddlePaddle将会根据`IOS_PLATFORM`自动选择`Xcode`对应`platform`的`Developer`目录。
- `IOS_SDK_ROOT`,所使用`SDK`的根目录,可显式指定为`/path/to/platform/Developer/SDKs/SDK`。若未显式指定PaddlePaddle将会自动选择`IOS_DEVELOPMENT_ROOT`目录下最新的`SDK`版本。
其他配置参数:
- `USE_EIGEN_FOR_BLAS`是否使用Eigen库进行矩阵计算在`IOS_USE_VECLIB_FOR_BLAS=OFF`时有效。可设置`ON/OFF`,默认值为`OFF`。
- `HOST_C/CXX_COMPILER`宿主机的C/C++编译器。默认值为环境变量`CC/CXX`的值;若环境变量`CC/CXX`未设置,则使用`cc/c++`编译器。
常用的cmake配置如下
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \
-DIOS_ARCH="arm64" \
-DIOS_ENABLE_BITCODE=ON \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \
..
```
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=SIMULATOR \
-DIOS_ARCH="x86_64" \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \
..
```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望得到最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS`来影响PaddlePaddle的编译过程。
**性能TIPS**为了达到最快的计算速度在CMake参数配置上有以下建议
- 设置`CMAKE_BUILD_TYPE`为`Release`
- 设置`IOS_USE_VECLIB_FOR_BLAS=ON`,调用`vecLib`框架提供的BLAS函数进行矩阵计算。
## 编译和安装
CMake配置完成后执行以下命令PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle预测库。
```
$ make
$ make install
```
注意如果你曾在源码目录下编译过其他平台的PaddlePaddle库请先使用`rm -rf`命令删除`third_party`目录和`build`目录以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。
执行完安装命令后,`your/path/to/install`目录中会包含以下内容:
- `include`目录其中包含所有C-API的头文件
- `lib`目录其中包含PaddlePaddle的C-API静态库
- `third_party`目录,其中包含所依赖的所有第三方库
注意不同架构的PaddlePaddle库建议安装到不同的目录下然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。
自此PaddlePaddle库已经安装完成用户可将合成的fat库用于深度学习相关的iOS App中调用方法见C-API文档。

@ -1,39 +1,36 @@
# 构建Raspberry Pi平台上的PaddlePaddle库
对于Rasspberry Pi系统用户可通过ssh等方式登录到Raspberry Pi系统上按照[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档所述直接编译Raspberry Pi平台上适用的PaddlePaddle库。
通常有两个方法来构建基于 Rasspberry Pi 的版本:
用户也可以在自己熟悉的开发平台上通过交叉编译的方式来编译。这篇文档将以Linux x86-64平台为例介绍交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤
1. 通过ssh等方式登录到Raspberry Pi系统上来构建。所需的开发工具和第三方库可以参考 [`/Dockerfile`](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile)
## 准备交叉编译环境
1. 另一个方法是交叉编译。这篇文档介绍在 Linux/x64 上交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤。
从源码交叉编译PaddlePaddle用户需要提前准备好交叉编译环境。用户可自行前往[github](https://github.com/raspberrypi/tools)下载Raspberry Pi平台使用的C/C++交叉编译工具链,也可通过以下命令获取:
## 安装交叉编译器
克隆下面 Github repo
```bash
git clone https://github.com/raspberrypi/tools.git
```
该github仓库中包含若干个预编译好的、针对不同平台的编译工具。宿主机是Linux x86-64环境则需选用`arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`下的作为编译工具所使用的编译器为arm-linux-gnueabihf-gcc 4.8.3。
注意该编译工具链需要系统glibc支持2.14以上。
即可在 `./tools/tree/master/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64` 目录里找到交叉编译器 arm-linux-gnueabihf-gcc 4.8.3。运行该编译工具链需要一台 Linux x64 机器上以及 2.14版本以上的 glibc。
## 配置交叉编译参数
CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake),以提供一些默认的编译器和编译参数相关配置
CMake[支持交叉编译](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。PaddlePaddle for Raspberry Pi的配置信息在[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake)。
交叉编译Raspberry Pi版本PaddlePaddle库时有一些必须配置的参数
- `CMAKE_SYSTEM_NAME`CMake编译的目标平台必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。
Raspberry Pi平台可选配置参数
- `CMAKE_SYSTEM_NAME`CMake编译的目标平台必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。
- `RPI_TOOLCHAIN`编译工具链所在的绝对路径或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器否则用户需要在cmake时手动设置这些值。无默认值。
- `RPI_ARM_NEON`是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`。
- `RPI_TOOLCHAIN`编译工具链所在的绝对路径或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器否则用户需要在cmake时手动设置这些值。无默认值。
其他配置参数:
- `RPI_ARM_NEON`是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`。
- `HOST_C/CXX_COMPILER`宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。
cmake参数如下
一个常用的CMake配置如下
```
cmake -DCMAKE_SYSTEM_NAME=RPi \
@ -47,7 +44,9 @@ cmake -DCMAKE_SYSTEM_NAME=RPi \
..
```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。
其中`WITH_C_API=ON`表示需要构建推理库。
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。
## 编译和安装
@ -60,6 +59,4 @@ make install
注意如果你曾经在源码目录下编译过其他平台的PaddlePaddle库请先使用`rm -rf`命令删除`third_party`目录和`build`目录以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。
执行完安装命令后由于上一步cmake配置中`WITH_C_API`设置为`ON``your/path/to/install`目录中会包含`include`和`lib`目录,其中`include`中包含C-API的头文件`lib`中包含一个Raspberry Pi版本的库。
更多的编译配置见[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档。
执行完安装命令后,`your/path/to/install`目录中会包含`include`和`lib`目录,其中`include`中包含C-API的头文件`lib`中包含一个Raspberry Pi版本的库。

@ -0,0 +1,62 @@
# Build PaddlePaddle for Raspberry Pi
You may use any of the following two approaches to build the inference library of PaddlePaddle for Raspberry Pi:
1. Build using SSH: Log in to a Raspberry Pi using SSH and build the library. The required development tools and third-party dependencies are listed in here: [`/Dockerfile`](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile).
1. Cross-compile: We talk about how to cross-compile PaddlePaddle for Raspberry Pi on a Linux/x64 machine, in more detail in this article.
## The Cross-Compiling Toolchain
Step 1. Clone the Github repo by running the following command.
```bash
git clone https://github.com/raspberrypi/tools.git
```
Step 2. Use the pre-built cross-compiler found in `./tools/tree/master/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`. To run it on a Linux computer, glibc version >= 2.14 is needed.
## CMake Arguments
CMake supports [cross-compiling](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling). All CMake configuration arguments required for the cross-compilation for Raspberry Pi can be found in [`cmake/cross_compiling/raspberry_pi.cmake`](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake).
Some important arguments that need to be set:
- `CMAKE_SYSTEM_NAME`: The target platform. Must be `RPi`.
- `RPI_TOOLCHAIN`: The absolute path of the cross-compiling toolchain.
- `RPI_ARM_NEON`: Use ARM NEON Intrinsics. This is a required argument and set default to `ON`.
- `HOST_C/CXX_COMPILER`: The C/C++ compiler for the host. It is used to build building tools running on the host, for example, protoc.
A commonly-used CMake configuration is as follows:
```
cmake -DCMAKE_SYSTEM_NAME=RPi \
-DRPI_TOOLCHAIN=your/path/to/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64 \
-DRPI_ARM_NEON=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_GPU=OFF \
-DWITH_C_API=ON \
-DWITH_PYTHON=OFF \
-DWITH_SWIG_PY=OFF \
..
```
To build the inference library, please set the argument WITH\_C\_API to ON: `WITH_C_API=ON`.
You can add more arguments. For example, to minimize the size of the generated inference library, you may use `CMAKE_BUILD_TYPE=MinSizeRel`. For performance optimization, you may use `CMAKE_BUILD_TYPE=Release`.
## Build and Install
The following commands build the inference library of PaddlePaddle for Raspberry Pi and third-party dependencies.
```bash
make
make install
```
The intermediate files will be stored in `build`. Third-party libraries will be located in `build/third_party`. If you have already built it for other platforms like Android or iOS, you may want to clear these directories by running the command: `rm -rf build`.
The infernece library will be in `your/path/to/install/lib`, with related header files in `your/path/to/install/include`.

@ -1,219 +0,0 @@
# Contribute Code
We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code.
## Code Requirements
- Your code comments must be fully documented by
[Doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
- Make sure the compiler option `WITH_STYLE_CHECK` is on and the compiler
passes the code style check.
- All code must have unit test.
- Pass all unit tests.
The following tutorial guides you into submitting your contibution.
## [Creating a Fork](https://help.github.com/articles/fork-a-repo/)
Just head over to the GitHub page and click the "Fork" button.
It's just that simple.
## Clone
Clone remote repository.
```bash
➜ git clone https://github.com/USERNAME/Paddle
➜ cd Paddle
```
## Create a local branch
Paddle is currently using [Git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
All feature and bug fix development work should be done on a new branch, generally create new branch from `develop` branch .
```bash
➜ git checkout -b my-cool-stuff
```
Before the checkout, you need to keep the current branch directory clean, otherwise the untracked file will be brought to the new branch, which can be inspected by `git status`.
## Using `pre-commit` hook
Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git
pre-commit hooks. It can help us format source codes (cpp, python), check some
basic thing before commit (only one EOL for each file, do not add a huge file
in git). `pre-commit` tests is a part of unit tests in Travis-CI now, every
PR doesn't fit hook can not be merged into Paddle.
To use [pre-commit](http://pre-commit.com/), you should install it by
`pip install pre-commit`, and currently, Paddle uses `clang-format` to format
c/cpp sources. Please make sure clang-format 3.8+ installed.
Install and run it as follow:
```bash
➜ pip install pre-commit
➜ pre-commit install
```
When you commit your code, the pre-commit hook will check the local code if there is
anything not suitable to commit, and so on.
## Start to develop
In this tutorial, I delete a line in README.md and created a new file.
We can use `git status` to inspect the changes of current directory, `git diff` to see difference.
```bash
➜ git status
On branch test
Changes not staged for commit:
(use "git add <file>..." to update what will be committed)
(use "git checkout -- <file>..." to discard changes in working directory)
modified: README.md
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
no changes added to commit (use "git add" and/or "git commit -a")
```
## Build and Test
We package PaddlePaddle's compile environment into a Docker image, called the develop image named `paddle:dev`, it contains all compiling tools that PaddlePaddle needs.
If you want to build the develop image, just run:
```bash
➜ docker build -t paddle:dev .
```
Then we can use the develop image to build PaddlePaddle source. For example:
```bash
➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev
```
The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated.
Then we can generate the production image by copying the compiled PaddlePaddle program into the image by
```bash
➜ docker build -t paddle:prod -f build/Dockerfile .
```
Run unit test finally:
```bash
➜ docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest"
```
For more details, you can read [this doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst).
## Commit
Next we cancel the changes to the README.md file and then commit our changes by following command lines:
```bash
➜ git checkout -- README.md
➜ git status
On branch test
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
nothing added to commit but untracked files present (use "git add" to track)
➜ git add test
```
We should write a description of each commit by `git commit` to allow others to know
the changes in these files.
```bash
➜ git commit
CRLF end-lines remover...............................(no files to check)Skipped
yapf.................................................(no files to check)Skipped
Check for added large files..............................................Passed
Check for merge conflicts................................................Passed
Check for broken symlinks................................................Passed
Detect Private Key...................................(no files to check)Skipped
Fix End of Files.....................................(no files to check)Skipped
clang-formater.......................................(no files to check)Skipped
[my-cool-stuff c703c041] add test file
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 233
```
## Keeping Fork Up to Date
Before pull your request, you should sync your code from the latest PaddlePaddle.
To do this, you'll need to add a remote at first:
```bash
➜ git remote add upstream https://github.com/PaddlePaddle/Paddle
➜ git remote
origin
upstream
```
Update your fork with the latest upstream changes:
```bash
➜ git fetch upstream
➜ git pull upstream develop
```
Now, your local master branch is up-to-date with everything modified upstream.
## Push to GitHub
```bash
# push to your repository in Github
➜ git push origin my-cool-stuff
```
## Create an issue and a Pull Request
Create an Issue to describe the problem and record its number.
Go to the page for your fork on GitHub, select your development branch,
and click the `New pull request`.
<img width="295" alt="screen shot 2017-04-26 at 9 09 28 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436054/a6d98c66-2ac4-11e7-9cb1-18dd13150230.png">
Then select the target branch:
<img width="750" alt="screen shot 2017-04-26 at 9 11 52 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436139/f83b1e6c-2ac4-11e7-8c0e-add499023c46.png">
We can add `resolve #Issue number` in PR description to close the issue automatically after the PR is merge. More details in <https://help.github.com/articles/closing-issues-via-commit-messages/>.
Then wait for review, if there need to modify, refer to the above steps to update the corresponding origin branch.
## Delete origin branch
After the PR is merge into the main repository, we can delete the remote branch on the PR page.
<img width="775" alt="screen shot 2017-04-26 at 9 18 24 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436457/e4cdd472-2ac5-11e7-9272-badc76c4a23e.png">
Or just run:
```bash
➜ git push origin :my-cool-stuff
```
## Delete local branch
Finally, we delete local branch:
```bash
➜ git checkout develop
# delete my-cool-stuff branch
➜ git branch -D my-cool-stuff
```

@ -0,0 +1 @@
../../../CONTRIBUTING.md

@ -21,7 +21,6 @@
dev/build_cn.rst
dev/write_docs_cn.rst
dev/contribute_to_paddle_cn.md
模型配置
--------

@ -19,7 +19,7 @@
* [启动集群作业](#启动集群作业-1)
* [在Kubernetes集群中提交训练作业](#在kubernetes集群中提交训练作业)
# 概述
## 概述
本文将介绍如何使用PaddlePaddle在不同的集群框架下完成分布式训练。分布式训练架构如下图所示
<img src="https://user-images.githubusercontent.com/13348433/31772175-5f419eca-b511-11e7-9db7-5231fe3d9ccb.png" width="500">
@ -32,7 +32,7 @@
在使用同步SGD训练神经网络时PaddlePaddle使用同步屏障barrier使梯度的提交和参数的更新按照顺序方式执行。在异步SGD中则并不会等待所有trainer提交梯度才更新参数这样极大地提高了计算的并行性参数服务器之间不相互依赖并行地接收梯度和更新参数参数服务器也不会等待计算节点全部都提交梯度之后才开始下一步计算节点之间也不会相互依赖并行地执行模型的训练。可以看出虽然异步SGD方式会提高参数更新并行度, 但是并不能保证参数同步更新在任意时间某一台参数服务器上保存的参数可能比另一台要更新与同步SGD相比梯度会有噪声。
# 环境准备
## 环境准备
1. 准备您的计算集群。计算集群通常由一组几台到几千台规模的Linux服务器组成。服务器之间可以通过局域网LAN联通每台服务器具有集群中唯一的IP地址或者可被DNS解析的主机名。集群中的每台计算机通常被成为一个“节点”。
1. 我们需要在集群的所有节点上安装 PaddlePaddle。 如果要启用GPU还需要在节点上安装对应的GPU驱动以及CUDA。PaddlePaddle的安装可以参考[build_and_install](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/getstarted/build_and_install)的多种安装方式。我们推荐使用[Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_cn.rst)安装方式来快速安装PaddlePaddle。
@ -51,8 +51,8 @@ PaddlePaddle 0.10.0, compiled with
下面以`doc/howto/usage/cluster/src/word2vec`中的代码作为实例介绍使用PaddlePaddle v2 API完成分布式训练。
# 启动参数说明
## 启动参数服务器
## 启动参数说明
### 启动参数服务器
执行以下的命令启动一个参数服务器并等待和计算节点的数据交互
```bash
$ paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradient_servers=1
@ -70,7 +70,7 @@ $ stdbuf -oL /usr/bin/nohup paddle pserver --port=7164 --ports_num=1 --ports_num
| ports_num_for_sparse | 必选 | 1 | 用于稀疏类型参数通信的端口个数 |
| num_gradient_servers | 必选 | 1 | 当前训练任务pserver总数 |
## 启动计算节点
### 启动计算节点
执行以下命令启动使用python编写的trainer程序文件名为任意文件名如train.py
```bash
$ python train.py
@ -117,7 +117,7 @@ paddle.init(
| pservers | 必选 | 127.0.0.1 | 当前训练任务启动的pserver的IP列表多个IP使用“,”隔开 |
## 准备数据集
### 准备数据集
参考样例数据准备脚本[prepare.py](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/src/word2vec/prepare.py)准备训练数据和验证数据集我们使用paddle.dataset.imikolov数据集并根据分布式训练并发数trainer节点个数在`prepare.py`开头部分指定`SPLIT_COUNT`将数据切分成多份。
@ -149,7 +149,7 @@ test.txt-00002
对于不同的训练任务,训练数据格式和训练程序的`reader()`会大不相同,所以开发者需要根据自己训练任务的实际场景完成训练数据的分割和`reader()`的编写。
## 准备训练程序
### 准备训练程序
我们会对每个训练任务都会在每个节点上创建一个工作空间workspace其中包含了用户的训练程序、程序依赖、挂载或下载的训练数据分片。
@ -184,7 +184,7 @@ test.txt-00002
- `train_data_dir`:包含训练数据的目录,可以是从分布式存储挂载过来的,也可以是在任务启动前下载到本地的。
- `test_data_dir`:包含测试数据集的目录。
# 使用分布式计算平台或工具
## 使用分布式计算平台或工具
PaddlePaddle可以使用多种分布式计算平台构建分布式计算任务包括
- [Kubernetes](http://kubernetes.io) Google开源的容器集群的调度框架支持大规模集群生产环境的完整集群方案。
@ -195,12 +195,12 @@ PaddlePaddle可以使用多种分布式计算平台构建分布式计算任务
在使用分布式计算平台进行训练时任务被调度在集群中时分布式计算平台通常会通过API或者环境变量提供任务运行需要的参数比如节点的ID、IP和任务节点个数等。
## 使用Fabric启动集群作业
### 使用Fabric启动集群作业
### 准备一个Linux集群
#### 准备一个Linux集群
可以在`paddle/scripts/cluster_train_v2/fabric/docker_cluster`目录下,执行`kubectl -f ssh_servers.yaml`启动一个测试集群,并使用`kubectl get po -o wide`获得这些节点的IP地址。
### 启动集群作业
#### 启动集群作业
`paddle.py` 提供了自动化脚本来启动不同节点中的所有 PaddlePaddle 集群进程。默认情况下,所有命令行选项可以设置为 `paddle.py` 命令选项并且 `paddle.py` 将透明、自动地将这些选项应用到 PaddlePaddle 底层进程。
@ -216,10 +216,10 @@ sh run.sh
集群作业将会在几秒后启动。
### 终止集群作业
#### 终止集群作业
`paddle.py`能获取`Ctrl + C` SIGINT 信号来自动终止它启动的所有进程。只需中断 `paddle.py` 任务来终止集群作业。如果程序崩溃你也可以手动终止。
### 检查集群训练结果
#### 检查集群训练结果
详细信息请检查 $workspace/log 里的日志,每一个节点都有相同的日志结构。
`paddle_trainer.INFO`
@ -234,13 +234,13 @@ sh run.sh
`train.log`
提供训练过程的 stderr 和 stdout。训练失败时可以检查错误日志。
### 检查模型输出
#### 检查模型输出
运行完成后,模型文件将被写入节点 0 的 `output` 目录中。
工作空间中的 `nodefile` 表示当前集群作业的节点 ID。
## 在OpenMPI集群中提交训练作业
### 在OpenMPI集群中提交训练作业
### 准备OpenMPI集群
#### 准备OpenMPI集群
执行下面的命令以启动3个节点的OpenMPI集群和一个"head"节点:
@ -252,7 +252,7 @@ kubectl create -f mpi-nodes.yaml
然后可以从head节点ssh无密码登录到OpenMPI的每个节点上。
### 启动集群作业
#### 启动集群作业
您可以按照下面的步骤在OpenMPI集群中提交paddle训练任务
@ -280,6 +280,6 @@ scp train.txt-00002 test.txt-00002 [node3IP]:/home/tutorial
mpirun -hostfile machines -n 3 /home/tutorial/start_mpi_train.sh
```
## 在Kubernetes集群中提交训练作业
### 在Kubernetes集群中提交训练作业
此部分的使用方法可以参考[here](../k8s/k8s_distributed_cn.md)。

@ -19,7 +19,7 @@
* [Launching Cluster Job](#launching-cluster-job-1)
* [Cluster Training Using Kubernetes](#cluster-training-using-kubernetes)
# Introduction
## Introduction
In this article, we'll explain how to run distributed training jobs with PaddlePaddle on different types of clusters. The diagram below shows the main architecture of a distributed trainning job:
@ -33,7 +33,7 @@ PaddlePaddle can support both synchronize stochastic gradient descent (SGD) and
When training with synchronize SGD, PaddlePaddle uses an internal "synchronize barrier" which makes gradients update and parameter download in strict order. On the other hand, asynchronous SGD won't wait for all trainers to finish upload at a single step, this will increase the parallelism of distributed training: parameter servers do not depend on each other, they'll do parameter optimization concurrently. Parameter servers will not wait for trainers, so trainers will also do their work concurrently. But asynchronous SGD will introduce more randomness and noises in the gradient.
# Preparations
## Preparations
1. Prepare your computer cluster. It's normally a bunch of Linux servers connected by LAN. Each server will be assigned a unique IP address. The computers in the cluster can be called "nodes".
2. Install PaddlePaddle on every node. If you are going to take advantage of GPU cards, you'll also need to install proper driver and CUDA libraries. To install PaddlePaddle please read [this build and install](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/getstarted/build_and_install) document. We strongly recommend using [Docker installation](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst).
@ -52,9 +52,9 @@ PaddlePaddle 0.10.0rc, compiled with
We'll take `doc/howto/usage/cluster/src/word2vec` as an example to introduce distributed training using PaddlePaddle v2 API.
# Command-line arguments
## Command-line arguments
## Starting parameter server
### Starting parameter server
Type the below command to start a parameter server which will wait for trainers to connect:
@ -74,7 +74,7 @@ $ stdbuf -oL /usr/bin/nohup paddle pserver --port=7164 --ports_num=1 --ports_num
| ports_num_for_sparse | required | 1 | number of ports which serves sparse parameter update |
| num_gradient_servers | required | 1 | total number of gradient servers |
## Starting trainer
### Starting trainer
Type the command below to start the trainer(name the file whatever you want, like "train.py")
```bash
@ -122,7 +122,7 @@ paddle.init(
| trainer_id | required | 0 | ID for every trainer, start from 0 |
| pservers | required | 127.0.0.1 | list of IPs of parameter servers, separated by "," |
## Prepare Training Dataset
### Prepare Training Dataset
Here's some example code [prepare.py](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/src/word2vec/prepare.py), it will download public `imikolov` dataset and split it into multiple files according to job parallelism(trainers count). Modify `SPLIT_COUNT` at the begining of `prepare.py` to change the count of output files.
@ -155,7 +155,7 @@ When job started, every trainer needs to get it's own part of data. In some dist
Different training jobs may have different data format and `reader()` function, developers may need to write different data prepare scripts and `reader()` functions for their job.
## Prepare Training program
### Prepare Training program
We'll create a *workspace* directory on each node, storing your training program, dependencies, mounted or downloaded dataset directory.
@ -191,7 +191,7 @@ Your workspace may looks like:
- `train_data_dir`: containing training data. Mount from storage service or copy trainning data to here.
- `test_data_dir`: containing testing data.
# Use cluster platforms or cluster management tools
## Use cluster platforms or cluster management tools
PaddlePaddle supports running jobs on several platforms including:
- [Kubernetes](http://kubernetes.io) open-source system for automating deployment, scaling, and management of containerized applications from Google.
@ -202,13 +202,13 @@ We'll introduce cluster job management on these platforms. The examples can be f
These cluster platforms provide API or environment variables for training processes, when the job is dispatched to different nodes. Like node ID, IP or total number of nodes etc.
## Cluster Training Using Fabric
### Cluster Training Using Fabric
### Prepare a Linux cluster
#### Prepare a Linux cluster
Run `kubectl -f ssh_servers.yaml` under the directory: `paddle/scripts/cluster_train_v2/fabric/docker_cluster` will launch a demo cluster. Run `kubectl get po -o wide` to get IP addresses of these nodes.
### Launching Cluster Job
#### Launching Cluster Job
`paddle.py` provides automatical scripts to start all PaddlePaddle cluster processes in different nodes. By default, all command line options can be set as `paddle.py` command options and `paddle.py` will transparently and automatically set these options to PaddlePaddle lower level processes.
`paddle.py`provides two distinguished command option for easy job launching.
@ -224,10 +224,10 @@ sh run.sh
The cluster Job will start in several seconds.
### Kill Cluster Job
#### Kill Cluster Job
`paddle.py` can capture `Ctrl + C` SIGINT signal to automatically kill all processes launched by it. So just stop `paddle.py` to kill cluster job. You should manually kill the job if the program crashed.
### Check Cluster Training Result
#### Check Cluster Training Result
Check log in $workspace/log for details, each node owns same log structure.
`paddle_trainer.INFO`
@ -242,13 +242,13 @@ It provides stderr and stdout of parameter server process. Check error log if tr
`train.log`
It provides stderr and stdout of trainer process. Check error log if training crashes.
### Check Model Output
#### Check Model Output
After one pass finished, model files will be written in `output` directory in node 0.
`nodefile` in workspace indicates the node id of current cluster job.
## Cluster Training Using OpenMPI
### Cluster Training Using OpenMPI
### Prepare an OpenMPI cluster
#### Prepare an OpenMPI cluster
Run the following command to start a 3-node MPI cluster and one "head" node.
@ -260,7 +260,7 @@ kubectl create -f mpi-nodes.yaml
Then you can log in to every OpenMPI node using ssh without input any passwords.
### Launching Cluster Job
#### Launching Cluster Job
Follow the steps to launch a PaddlePaddle training job in OpenMPI cluster:\
@ -288,6 +288,6 @@ scp train.txt-00002 test.txt-00002 [node3IP]:/home/tutorial
mpirun -hostfile machines -n 3 /home/tutorial/start_mpi_train.sh
```
## Cluster Training Using Kubernetes
### Cluster Training Using Kubernetes
The details can be found [here](../k8s/k8s_cn.md)

1
go/.gitignore vendored

@ -1,2 +1,3 @@
vendor/
.glide/
proto/*.go

4
go/glide.lock generated

@ -1,5 +1,5 @@
hash: 51d9e2e46d7fd9173ff11ecada40f7b7728756be18d5e2f032535f66465e6e15
updated: 2017-10-24T15:04:09.987751592-07:00
hash: 107c058cf5c9163a75d40eef2273a793c36112683c25d72aa8288827fdde3a19
updated: 2017-10-30T03:46:19.137696069Z
imports:
- name: github.com/alecthomas/gometalinter
version: bae2f1293d092fd8167939d5108d1b025eaef9de

@ -30,3 +30,4 @@ import:
version: v2.13
- package: github.com/go-stack/stack
version: v1.6.0
- package: github.com/golang/protobuf

@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

@ -13,5 +13,5 @@
# limitations under the License.
#
if(WITH_TESTING)
go_test(pserver_test DEPS paddle_go_optimizer)
go_test(pserver_test DEPS paddle_go_optimizer gen_proto_go)
endif()

@ -17,6 +17,7 @@ package pserver
import (
"bufio"
"bytes"
"encoding/binary"
"encoding/gob"
"encoding/json"
"errors"
@ -26,11 +27,15 @@ import (
"os"
"path"
"strconv"
"strings"
"sync"
"time"
"github.com/golang/protobuf/proto"
uuid "github.com/satori/go.uuid"
pb "github.com/PaddlePaddle/Paddle/go/proto"
log "github.com/inconshreveable/log15"
)
@ -65,6 +70,46 @@ type Parameter struct {
Content []byte
}
func float32ToString(b []byte) string {
f := make([]float32, len(b)/4)
buf := bytes.NewReader(b)
err := binary.Read(buf, binary.LittleEndian, &f)
if err != nil {
return ""
}
return fmt.Sprintf("%v", f)
}
func float32ByteToString(c []byte) string {
var a []byte
var b []byte
if len(c) <= 80 {
a = c
} else {
a = c[0:40]
b = c[len(c)-40:]
}
var s string
s = float32ToString(a)
if b == nil {
return s
}
s = strings.Replace(s, "]", "", -1) + "..." + strings.Replace(float32ToString(b), "[", "", -1)
return s
}
func (p Parameter) String() string {
if p.ElementType != Float32 {
return fmt.Sprintf("name:%v ElementType:%v",
p.Name, p.ElementType)
}
return float32ByteToString(p.Content)
}
// ParameterWithConfig contains the parameter and the configuration.
type ParameterWithConfig struct {
Param Parameter
@ -189,7 +234,9 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, _ *int) error
default:
}
// TODO(helin): parse parameter config
c := &pb.OptimizerConfig{}
proto.Unmarshal(paramWithConfigs.Config, c)
log.Debug(fmt.Sprintf("OptimizerConfig:%v", c))
s.mu.Lock()
defer s.mu.Unlock()
@ -239,7 +286,8 @@ func (s *Service) SendGrad(g Gradient, _ *int) error {
select {
case <-s.initialized:
default:
log.Warn("received gradient before initialization.", "name", g.Name, "size", len(g.Content), "type", g.ElementType)
log.Warn("received gradient before initialization.",
"name", g.Name, "size", len(g.Content), "type", g.ElementType)
return errors.New(Uninitialized)
}
@ -248,10 +296,14 @@ func (s *Service) SendGrad(g Gradient, _ *int) error {
o, ok := s.optMap[g.Name]
if !ok {
log.Warn("received gradient but can't find name.",
"name", g.Name, "size", len(g.Content), "type", g.ElementType)
return fmt.Errorf("parameter: %s does not exist", g.Name)
}
log.Info("received gradient from trainer, updating gradient.", "name", g.Name, "size", len(g.Content), "type", g.ElementType)
log.Debug(Parameter(g).String())
log.Info("received gradient from trainer, updating gradient.",
"name", g.Name, "size", len(g.Content), "type", g.ElementType)
return o.UpdateParameter(g)
}
@ -277,7 +329,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
parameter.Name = name
parameter.ElementType = opt.elementType
parameter.Content = opt.GetWeights()
log.Debug(parameter.String())
log.Info("sending parameter to the trainer", "name", parameter.Name, "size", len(parameter.Content), "type", parameter.ElementType)
return nil
}

@ -15,6 +15,7 @@
package pserver_test
import (
"fmt"
"io/ioutil"
"reflect"
"sync"
@ -178,3 +179,33 @@ func TestBlockUntilInitialized(t *testing.T) {
wg.Wait()
}
func TestGradientString(t *testing.T) {
g := pserver.Parameter{}
g.ElementType = pserver.Float32
g.Content = []byte{0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40, 0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40}
if g.String() != "[3.3702806e+12 2.142699 3.3702806e+12 2.142699]" {
t.Fatal("get float data error!")
}
g.Content = []byte{0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40,
0x18, 0x2d, 0x44, 0x54, 0xfb, 0x21, 0x09, 0x40}
if g.String() != "[3.3702806e+12 2.142699 3.3702806e+12 2.142699 3.3702806e+12 2.142699 3.3702806e+12 2.142699 3.3702806e+12 2.142699...3.3702806e+12 2.142699 3.3702806e+12 2.142699 3.3702806e+12 2.142699 3.3702806e+12 2.142699 3.3702806e+12 2.142699]" {
t.Fatal("get float data error!", g.String())
}
fmt.Println(g)
}

@ -300,4 +300,12 @@ extern void hl_matrix_col2Vol(real* dataDst,
real alpha,
real beta);
/**
* @brief Matrix col2Vol: Convert col matrix into 3D volume
* @param[out] out output int vector.
* @param[in] vec input float vector.
* @param[in] size size of the vector.
*/
extern void hl_vector_cast2int(int* out, real* vec, int size);
#endif /* HL_MATRIX_H_ */

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save