fix conflict with baidu/develop

9 years ago · 766a61c374
parent fcf177fc4b 7180b4246d
commit 766a61c374
92 changed files with 2877 additions and 789 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
 set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b2)
+set(PADDLE_PATCH_VERSION 0b3)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
@ -135,6 +135,7 @@ endif()
 if(WITH_GLOG)
    add_definitions(-DPADDLE_USE_GLOG)
    include_directories(${LIBGLOG_INCLUDE_DIR})
 endif()
 if(WITH_GFLAGS)
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@ -21,12 +21,6 @@ function(safe_set_flag is_c src_list flag_name)
    endif()
    if(${safe_name})
        set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE)
        if(is_c)
          set(CUDA_NVCC_FLAGS
              --compiler-options;${flag_name}
              ${CUDA_NVCC_FLAGS}
              PARENT_SCOPE)
        endif()
    endif()
 endfunction()
@ -40,6 +34,20 @@ macro(safe_set_cxxflag src_list flag_name)
    safe_set_flag(OFF ${src_list} ${flag_name})
 endmacro()
 # helper macro to set nvcc flag
 macro(safe_set_nvflag flag_name)
    string(REPLACE "-" "_" safe_name ${flag_name})
    string(REPLACE "=" "_" safe_name ${safe_name})
    CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name})
    set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name})
    if(${safe_name})
        set(CUDA_NVCC_FLAGS
            --compiler-options;${flag_name}
            ${CUDA_NVCC_FLAGS})
    endif()
 endmacro()
 CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
 if(NOT UINT64_MAX_EXISTS)
  set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS)
@ -63,20 +71,43 @@ set(COMMON_FLAGS
    -Wnon-virtual-dtor
    -Wdelete-non-virtual-dtor
    -Wno-unused-parameter
    -Wno-unused-function
    -Wno-error=literal-suffix
    -Wno-error=unused-local-typedefs)
 set(GPU_COMMON_FLAGS
    -fPIC
    -fno-omit-frame-pointer
    -Wnon-virtual-dtor
    -Wdelete-non-virtual-dtor
    -Wno-unused-parameter
    -Wno-unused-function
    -Wno-error=literal-suffix
    -Wno-error=unused-local-typedefs
    -Wno-error=unused-function  # Warnings in Numpy Header.
 )
 if (APPLE)
    # On Mac OS X build fat binaries with x86_64 architectures by default.
    set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
 else()
    set(GPU_COMMON_FLAGS
        -Wall
        -Wextra
        -Werror
        ${GPU_COMMON_FLAGS})
 endif()
 foreach(flag ${COMMON_FLAGS})
    safe_set_cflag(CMAKE_C_FLAGS ${flag})
    safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
 endforeach()
-# On Mac OS X build fat binaries with x86_64 architectures by default.
+foreach(flag ${GPU_COMMON_FLAGS})
-if (APPLE)
+    safe_set_nvflag(${flag})
-    set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
+endforeach()
-endif ()
+
 # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
 # So, don't set these flags here.
--- a/cmake/swig.cmake
+++ b/cmake/swig.cmake
@ -27,6 +27,7 @@ function(generate_python_api target_name)
        COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
                && mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
        DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
                ${PROJ_ROOT}/paddle/api/PaddleAPI.h
        WORKING_DIRECTORY ${PROJ_ROOT}/paddle
        COMMENT "Generate Python API from swig")
    add_custom_target(${target_name} ALL DEPENDS
--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
@ -0,0 +1,4 @@
 This folder contains scripts used in PaddlePaddle introduction.
 - use `bash train.sh` to train a simple linear regression model
 - use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@ -0,0 +1,24 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer.PyDataProvider2 import *
 import random
 # define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
 def process(settings, input_file):
    for i in xrange(2000):
        x = random.random()
        yield [x], [2*x+0.3]
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@ -0,0 +1,36 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Print model parameters in last model
 Usage:
    python evaluate_model.py
 """
 import numpy as np
 import os
 def load(file_name):
    with open(file_name, 'rb') as f:
        f.read(16) # skip header for float type.
        return np.fromfile(f, dtype=np.float32)
 def main():
    print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
            load('output/pass-00029/b'))
 if __name__ == '__main__':
    main()
--- a/demo/introduction/train.sh
+++ b/demo/introduction/train.sh
@ -0,0 +1,21 @@
 #!/bin/bash
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 set -e
 paddle train \
    --config=trainer_config.py \
    --save_dir=./output \
    --num_passes=30 \
    2>&1 |tee 'train.log'
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@ -0,0 +1,32 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 # 1. read data. Suppose you saved above python code as dataprovider.py
 data_file = 'empty.list'
 with open(data_file, 'w') as f: f.writelines(' ')
 define_py_data_sources2(train_list=data_file, test_list=None, 
        module='dataprovider', obj='process',args={})
 # 2. learning algorithm
 settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
 # 3. Network configuration
 x = data_layer(name='x', size=1)
 y = data_layer(name='y', size=1)
 y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
 cost = regression_cost(input=y_predict, label=y)
 outputs(cost)
--- a/demo/quick_start/api_train.py
+++ b/demo/quick_start/api_train.py
@ -0,0 +1,114 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 import itertools
 import random
 from paddle.trainer.config_parser import parse_config
 from py_paddle import swig_paddle as api
 from py_paddle import DataProviderConverter
 from paddle.trainer.PyDataProvider2 \
    import integer_value, integer_value_sequence, sparse_binary_vector
 def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_data",
                        type=str, required=False, help="train data file")
    parser.add_argument("--test_data", type=str, help="test data file")
    parser.add_argument("--config",
                        type=str, required=True, help="config file name")
    parser.add_argument("--dict_file", required=True, help="dictionary file")
    parser.add_argument("--seq",
                        default=1, type=int,
                        help="whether use sequence training")
    parser.add_argument("--use_gpu", default=0, type=int,
                        help="whether use GPU for training")
    parser.add_argument("--trainer_count", default=1, type=int,
                        help="Number of threads for training")
    parser.add_argument("--num_passes", default=5, type=int,
                        help="Number of training passes")
    return parser.parse_args()
 UNK_IDX = 0
 def load_data(file_name, word_dict):
    with open(file_name, 'r') as f:
        for line in f:
            label, comment = line.strip().split('\t')
            words = comment.split()
            word_slot = [word_dict.get(w, UNK_IDX) for w in words]
            yield word_slot, int(label)
 def load_dict(dict_file):
    word_dict = dict()
    with open(dict_file, 'r') as f:
        for i, line in enumerate(f):
            w = line.strip().split()[0]
            word_dict[w] = i
    return word_dict
 def main():
    options = parse_arguments()
    api.initPaddle("--use_gpu=%s" % options.use_gpu,
                   "--trainer_count=%s" % options.trainer_count)
    word_dict = load_dict(options.dict_file)
    train_dataset = list(load_data(options.train_data, word_dict))
    if options.test_data:
        test_dataset = list(load_data(options.test_data, word_dict))
    else:
        test_dataset = None
    trainer_config = parse_config(options.config,
                                  "dict_file=%s" % options.dict_file)
    # No need to have data provider for trainer
    trainer_config.ClearField('data_config')
    trainer_config.ClearField('test_data_config')
    # create a GradientMachine from the model configuratin
    model = api.GradientMachine.createFromConfigProto(
        trainer_config.model_config)
    # create a trainer for the gradient machine
    trainer = api.Trainer.create(trainer_config, model)
    # create a data converter which converts data to PaddlePaddle
    # internal format
    input_types = [
        integer_value_sequence(len(word_dict)) if options.seq
            else sparse_binary_vector(len(word_dict)),
        integer_value(2)]
    converter = DataProviderConverter(input_types)
    batch_size = trainer_config.opt_config.batch_size
    trainer.startTrain()
    for train_pass in xrange(options.num_passes):
        trainer.startTrainPass()
        random.shuffle(train_dataset)
        for pos in xrange(0, len(train_dataset), batch_size):
            batch = itertools.islice(train_dataset, pos, pos + batch_size)
            size = min(batch_size, len(train_dataset) - pos)
            trainer.trainOneDataBatch(size, converter(batch))
        trainer.finishTrainPass()
        if test_dataset:
            trainer.startTestPeriod();
            for pos in xrange(0, len(test_dataset), batch_size):
                batch = itertools.islice(test_dataset, pos, pos + batch_size)
                size = min(batch_size, len(test_dataset) - pos)
                trainer.testOneDataBatch(size, converter(batch))
            trainer.finishTestPeriod()
    trainer.finishTrain()
 if __name__ == '__main__':
    main()
--- a/demo/quick_start/api_train.sh
+++ b/demo/quick_start/api_train.sh
@ -0,0 +1,29 @@
 #!/bin/bash
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 set -e
 # Note: if using trainer_config.emb.py, trainer_config.cnn.py
 # or trainer_config.lstm.py, you need to change --seq to --seq=1
 # because they are sequence models.
 python api_train.py \
  --config=trainer_config.lr.py \
  --trainer_count=2 \
  --num_passes=15 \
  --use_gpu=0 \
  --seq=0 \
  --train_data=data/train.txt \
  --test_data=data/test.txt \
  --dict_file=data/dict.txt \
  2>&1 | tee 'train.log'
--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@ -16,6 +16,7 @@ from paddle.trainer.PyDataProvider2 import *
 UNK_IDX = 0
 def initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
    settings.input_types = [
--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@ -24,7 +24,7 @@ paddle train \
  --config=$cfg \
  --save_dir=./output \
  --trainer_count=4 \
-  --log_period=20 \
+  --log_period=100 \
  --num_passes=15 \
  --use_gpu=false \
  --show_parameter_stats_period=100 \
--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@ -16,7 +16,7 @@
 from paddle.trainer_config_helpers import *
-dict_file = "./data/dict.txt"
+dict_file = get_config_arg('dict_file', str, "./data/dict.txt")
 word_dict = dict()
 with open(dict_file, 'r') as f:
    for i, line in enumerate(f):
@ -63,7 +63,6 @@ if not is_predict:
    label = data_layer(name="label", size=2)
    # Define cross-entropy classification loss and error.
    classification_cost(input=output, label=label)
    cls = classification_cost(input=output, label=label)
    outputs(cls)
 else:
--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@ -42,20 +42,13 @@ settings(
    gradient_clipping_threshold=25
 )
 bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
-fc = fc_layer(input=emb, size=512,
+lstm = simple_lstm(input=emb, size=128,
-              act=LinearActivation(),
+                   lstm_cell_attr=ExtraAttr(drop_rate=0.25))
-              bias_attr=bias_attr,
+lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-              layer_attr=ExtraAttr(drop_rate=0.1))
+output = fc_layer(input=lstm_max, size=2,
 lstm = lstmemory(input=fc, act=TanhActivation(),
                 bias_attr=bias_attr,
                 layer_attr=ExtraAttr(drop_rate=0.25))
 lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
 output = fc_layer(input=lstm_last, size=2,
                  bias_attr=bias_attr,
                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@ -46,8 +46,8 @@ class SentimentPrediction():
        conf = parse_config(train_conf, "is_predict=1")
        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
        self.network.loadParameters(self.model_dir)
-        slots = [integer_value_sequence(self.dict_dim)]
+        input_types = [integer_value_sequence(self.dict_dim)]
-        self.converter = DataProviderConverter(slots)
+        self.converter = DataProviderConverter(input_types)
    def load_dict(self):
        """
--- a/doc/build/build_from_source.md
+++ b/doc/build/build_from_source.md
@ -153,12 +153,12 @@ As a simple example, consider the following:
 - **Only CPU**
  ```bash
-  cmake  .. -DWITH_GPU=OFF -DWITH_DOC=OFF
+  cmake  .. -DWITH_GPU=OFF
  ```
 - **GPU**
  ```bash
-  cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
+  cmake .. -DWITH_GPU=ON
  ```
 - **GPU with doc and swig**
@ -171,7 +171,7 @@ Finally, you can build PaddlePaddle:
 ```bash
 # you can add build option here, such as:    
-cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<path to install>
+cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<path to install>
 # please use sudo make install, if you want to install PaddlePaddle into the system
 make -j `nproc` && make install
 # set PaddlePaddle installation path in ~/.bashrc
@ -246,7 +246,7 @@ easy_install pip
        ```bash
        sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
-        sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
+        sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib/libcudnn*
        ```
    2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
@ -273,12 +273,12 @@ As a simple example, consider the following:
 - **Only CPU**
  ```bash
-  cmake  .. -DWITH_GPU=OFF -DWITH_DOC=OFF
+  cmake  .. -DWITH_GPU=OFF
  ```
 - **GPU**
  ```bash
-  cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
+  cmake .. -DWITH_GPU=ON
  ```
 - **GPU with doc and swig**
@ -291,9 +291,9 @@ Finally, you can build PaddlePaddle:
 ```bash
 # you can add build option here, such as:    
-cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<installation path>
+cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<installation path>
 # please use sudo make install, if you want to install PaddlePaddle into the system
-make -j `nproc` && make install
+make -j `sysctl -n hw.ncpu` && make install
 # set PaddlePaddle installation path in ~/.bashrc
 export PATH=<installation path>/bin:$PATH
 ```
--- a/doc/build/contribute_to_paddle.md
+++ b/doc/build/contribute_to_paddle.md
@ -4,7 +4,7 @@ We sincerely appreciate your contributions. You can use fork and pull request
 workflow to merge your code. 
 ## Code Requirements
- Your code mush be fully documented by
+- Your code must be fully documented by
  [doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
 - Make sure the compiler option WITH\_STYLE\_CHECK is on and the compiler
  passes the code style check.
@ -20,16 +20,30 @@ It's just that simple.
 ## Clone
 Paddle is currently using [git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
 The **develop** is the main branch, and other user's branches are feature branches.
 Once you've created a fork, you can use your favorite git client to clone your
 repo or just head straight to the command line:
 ```shell
 # Clone your fork to your local machine
-git clone https://github.com/USERNAME/Paddle.git
+git clone --branch develop https://github.com/USERNAME/Paddle.git
 ```
 If your repository doesn't contain **develop** branch, just create it by your own.
 ```shell
 git clone https://github.com/USERNAME/Paddle.git Paddle
 cd Paddle
 git checkout -b develop  # create develop branch.
 git remote add upstream https://github.com/baidu/Paddle.git  # add upstream to baidu/Paddle
 git pull upstream develop  # update to upstream
 ```
 Then you can start to develop by making a local developement branch
 ```shell
-git checkout -b MY_COOL_STUFF_BRANCH origin/master
+git checkout -b MY_COOL_STUFF_BRANCH
 ```
 ## Commit
@ -41,7 +55,7 @@ Commit your changes by following command lines:
 git status
 # add modified files
 git add xx
-git commit -m "commit info"
+env EDITOR=vim git commit  # You can write your comments by vim/nano/emacs.
 ```
 The first line of commit infomation is the title. The second and later lines
 are the details if any.
@ -63,7 +77,7 @@ git remote -v
 Update your fork with the latest upstream changes:
 ```shell
-git pull --rebase upstream HEAD
+git pull --rebase upstream develop
 ```
 If there are no unique commits locally, git will simply perform a fast-forward.
@ -76,7 +90,7 @@ Now, your local master branch is up-to-date with everything modified upstream.
 ```shell
 # push to your repository in Github
-git push origin HEAD
+git push -u origin MY_COOL_STUFF_BRANCH  # create remote branch MY_COOL_STUFF_BRANCH to origin.
 ```
 ## Pull Request
@ -93,13 +107,24 @@ of conflict, you need to do the update manually. You need to do the following on
 your local repository:
 ```shell
 git checkout MY_COOL_STUFF_BRANCH
-git pull --rebase upstream HEAD
+git pull upstream develop
 # You may need to resolve the conflict according to the git prompt.
 # Make and test your code.
-git push -f origin HEAD
+git push origin MY_COOL_STUFF_BRANCH
 ```
 Now your Pull Request is updated with the latest version.
 ## Revise your pull request
 When you revise your pull request according to reviewer's comments, please use 'git commit' instead of 'git commit --amend' to commit your changes so that the reviewers can see the difference between the new pull requrest and the old pull request.
 The possible commands are
 ```shell
 git checkout MY_COOL_STUFF_BRANCH
 git pull upstream develop   # update local to newest code base.
 # May be some conflicts will occured.
 # And develop your cool stuff
 env EDITOR=vim git commit  # add your revise log
 git push origin MY_COOL_STUFF_BRANCH
 ```
--- a/doc/index.md
+++ b/doc/index.md
@ -3,6 +3,7 @@ PaddlePaddle Documentation
 User Guide
 ----------
 * [Introduction](introduction/index.md)
 * [Quick Start](demo/quick_start/index_en.md)
 * [Build and Installation](build/index.rst)
 * [Contribute Code](build/contribute_to_paddle.md)
--- a/doc/introduction/index.md
+++ b/doc/introduction/index.md
@ -0,0 +1,101 @@
 # Introduction
 PaddlePaddle is a deep learning platform open-sourced by Baidu. With PaddlePaddle, you can easily train a classic neural network within a couple lines of configuration, or you can build sophisticated models that provide state-of-the-art performance on difficult learning tasks like sentiment analysis, machine translation, image caption and so on.
 ## 1. A Classic Problem
 Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - <a href="https://en.wikipedia.org/wiki/Simple_linear_regression">**simple linear regression**</a> : you have observed a set of two-dimensional data points of `X` and `Y`, where `X` is an explanatory variable and `Y` is corresponding dependent variable, and you want to recover the underlying correlation between `X` and `Y`. Linear regression can be used in many practical scenarios. For example, `X` can be a variable about house size, and `Y` a variable about house price. You can build a model that captures relationship between them by observing real estate markets.
 ## 2. Prepare the Data
 Suppose the true relationship can be characterized as `Y = 2X + 0.3`, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types.
 ```python
 # dataprovider.py
 from paddle.trainer.PyDataProvider2 import *
 import random
 # define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
 def process(settings, input_file):
    for i in xrange(2000):
        x = random.random()
        yield [x], [2*x+0.3]
 ```
 ## 3. Train a NeuralNetwork in PaddlePaddle
 To recover this relationship between `X` and `Y`, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line `Y' = wX + b` , then we gradually adapt `w` and `b` to minimize the difference between `Y'` and `Y`. Here is what it looks like in PaddlePaddle:
 ```python
 # trainer_config.py
 from paddle.trainer_config_helpers import *
 # 1. read data. Suppose you saved above python code as dataprovider.py
 data_file = 'empty.list'
 with open(data_file, 'w') as f: f.writelines(' ')
 define_py_data_sources2(train_list=data_file, test_list=None, 
        module='dataprovider', obj='process',args={})
 # 2. learning algorithm
 settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
 # 3. Network configuration
 x = data_layer(name='x', size=1)
 y = data_layer(name='y', size=1)
 y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
 cost = regression_cost(input=y_predict, label=y)
 outputs(cost)
 ```
 Some of the most fundamental usages of PaddlePaddle are demonstrated:
 -  The first part shows how to feed data into PaddlePaddle. In general cases, PaddlePaddle reads raw data from a list of files, and then do some user-defined process to get real input. In this case, we only need to create a placeholder file since we are generating synthetic data on the fly.
 -  The second part describes learning algorithm. It defines in what ways adjustments are made to model parameters. PaddlePaddle provides a rich set of optimizers, but a simple momentum based optimizer will suffice here, and it processes 12 data points each time.
 -  Finally, the network configuration. It usually is as simple as "stacking" layers. Three kinds of layers are used in this configuration:
 	-  **Data Layer**: a network always starts with one or more data layers. They provide input data to the rest of the network. In this problem, two data layers are used respectively for `X` and `Y`.
 	-  **FC Layer**: FC layer is short for Fully Connected Layer, which connects all the input units to current layer and does the actual computation specified as activation function. Computation layers like this are the fundamental building blocks of a deeper model.
 	-  **Cost Layer**: in training phase, cost layers are usually the last layers of the network. They measure the performance of current model, and provide guidence to adjust parameters.
 Now that everything is ready, you can train the network with a simple command line call:
 ```
 paddle train --config=trainer_config.py --save_dir=./output --num_passes=30
 ```
 This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path `./output`. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess.
 ## 4. Evaluate the Model
 Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: `w=2, b=0.3`, thus a better option is to check out model parameters directly.
 In PaddlePaddle, training is just to get a collection of model parameters, which are `w` and `b` in this case. Each parameter is saved in an individual file in the popular `numpy` array format. Here is the code that reads parameters from last pass.
 ```python
 import numpy as np
 import os
 def load(file_name):
    with open(file_name, 'rb') as f:
        f.read(16) # skip header for float type.
        return np.fromfile(f, dtype=np.float32)
 print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b'))
 # w=1.999743, b=0.300137
 ```
 <center> ![](./parameters.png) </center>
 Although starts from a random guess, you can see that value of `w` changes quickly towards 2 and `b` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
 There, you have recovered the underlying pattern between `X` and `Y` only from observed data.
 ## 5. Where to Go from Here
 - <a href="../build/index.html"> Build and Installation </a>
 - <a href="../demo/quick_start/index_en.html">Quick Start</a>
 - <a href="../demo/index.html">Example and Demo</a>
--- a/doc/introduction/parameters.png
+++ b/doc/introduction/parameters.png
@ -0,0 +1 @@
 ../../doc_cn/introduction/parameters.png
--- a/doc/ui/cmd_argument/argument_outline.md
+++ b/doc/ui/cmd_argument/argument_outline.md
@ -183,7 +183,7 @@ It looks like there are a lot of arguments. However, most of them are for develo
 </tr>
 <tr>
-<td class="left" rowspan = "5">GPU</td><td class="left">gpu_id</td>
+<td class="left" rowspan = "6">GPU</td><td class="left">gpu_id</td>
 <td class="left">√</td><td class="left">√</td><td class="left">√</td><td class="left">√</td>
 </tr>
@ -207,6 +207,11 @@ It looks like there are a lot of arguments. However, most of them are for develo
 <td class="left">√</td><td class="left">√</td><td class="left">√</td><td class="left">√</td>
 </tr>
 <tr>
 <td class="left">cudnn_conv_workspace_limit_in_mb</td>
 <td class="left">√</td><td class="left">√</td><td class="left">√</td><td class="left">√</td>
 </tr>
 <tr>
 <td class="left" rowspan = "4">RNN</td>
 <td class="left">beam_size</td>
--- a/doc/ui/cmd_argument/detail_introduction.md
+++ b/doc/ui/cmd_argument/detail_introduction.md
@ -163,6 +163,10 @@
  - Choose path to dynamic load NVIDIA CUDA library, for instance, /usr/local/cuda/lib64. [Default]: LD_LIBRARY_PATH
  - type: string (default: "", null)
 * `--cudnn_conv_workspace_limit_in_mb`
  - Specify cuDNN max workspace limit, in units MB, 4096MB=4GB by default. 
  - type: int32 (default: 4096MB=4GB)
 ## NLP: RNN/LSTM/GRU
 * `--rnn_use_batch`
  - Whether to use batch method for calculation in simple RecurrentLayer.
--- a/doc_cn/concepts/nn.rst
+++ b/doc_cn/concepts/nn.rst
@ -0,0 +1,3 @@
 TBD
 目前正在书写中。敬请期待。
--- a/doc_cn/concepts/program_concepts.rst
+++ b/doc_cn/concepts/program_concepts.rst
@ -0,0 +1,4 @@
 TBD
 ###
 目前正在书写中。敬请期待。
--- a/doc_cn/concepts/pserver_topology.dot
+++ b/doc_cn/concepts/pserver_topology.dot
@ -0,0 +1,68 @@
 graph pp_topology {
 	rankdir=BT;
 	subgraph cluster_node0 {
 		style=filled;
 		color=lightgrey;
 		node [style=filled, color=white, shape=box];
 		label = "机器0"
 		pserver0 [label="Parameter \n Server 0"]
 		trainer0 [label="Trainer 0"]
 	}
 	subgraph cluster_node1 {
 		style=filled;
 		color=lightgrey;
 		node [style=filled, color=white, shape=box];
 		label = "机器1"
 		pserver1 [label="Parameter \n Server 1"]
 		trainer1 [label="Trainer 1"]
 	}
 	subgraph cluster_node2 {
 		style=filled;
 		color=lightgrey;
 		node [style=filled, color=white, shape=box];
 		label = "机器2"
 		pserver2 [label="Parameter \n Server 2"]
 		trainer2 [label="Trainer 2"]
 	}
 	subgraph cluster_node3 {
 		style=filled;
 		color=lightgrey;
 		node [style=filled, color=white, shape=box];
 		label = "机器3"
 		pserver3 [label="Parameter \n Server 3"]
 		trainer3 [label="Trainer 3"]
 	}
 	data [label="数据", shape=hexagon]
 	trainer0 -- pserver0
 	trainer0 -- pserver1
 	trainer0 -- pserver2
 	trainer0 -- pserver3
 	trainer1 -- pserver0
 	trainer1 -- pserver1
 	trainer1 -- pserver2
 	trainer1 -- pserver3
 	trainer2 -- pserver0
 	trainer2 -- pserver1
 	trainer2 -- pserver2
 	trainer2 -- pserver3
 	trainer3 -- pserver0
 	trainer3 -- pserver1
 	trainer3 -- pserver2
 	trainer3 -- pserver3
 	data -- trainer0
 	data -- trainer1
 	data -- trainer2
 	data -- trainer3
 }
--- a/Show More
+++ b/Show More
		`@ -0,0 +1,3 @@`
							`TBD`

							`目前正在书写中。敬请期待。`