Merge pull request #2 from PaddlePaddle/develop

Merge from PaddlePaddle/develop
8 years ago · 515543ab2f
parent 2a9d71a5fb bd689cbbe5
commit 515543ab2f
130 changed files with 7117 additions and 811 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
 [submodule "book"]
 	path = book
 	url = https://github.com/PaddlePaddle/book.git
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -2,12 +2,12 @@
    sha: c25201a00e6b0514370501050cf2a8538ac12270
    hooks:
    -   id: remove-crlf
-        files: (?!.*third_party)^.*$
+        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
 -   repo: https://github.com/reyoung/mirrors-yapf.git
    sha: v0.13.2
    hooks:
    - id: yapf
-      files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$  # Bazel BUILD files follow Python syntax.
+      files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
 -   repo: https://github.com/pre-commit/pre-commit-hooks
    sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
    hooks:
@ -15,7 +15,7 @@
    -   id: check-merge-conflict
    -   id: check-symlinks
    -   id: detect-private-key
-        files: (?!.*third_party)^.*$
+        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
    -   id: end-of-file-fixer
 -   repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
    sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
--- a/.travis.yml
+++ b/.travis.yml
@ -4,22 +4,14 @@ cache:
    - $HOME/third_party
    - $HOME/.ccache
    - $HOME/.cache/pip
    - $HOME/Library/Caches/Homebrew
 sudo: required
 dist: trusty
 os:
  - linux
  - osx
 env:
  - JOB=DOCS
  - JOB=BUILD_AND_TEST
  - JOB=PRE_COMMIT
 matrix:
  exclude:
    - os: osx
      env: JOB=DOCS  # Only generate documentation in linux.
    - os: osx
      env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
 addons:
  apt:
@ -53,11 +45,10 @@ before_install:
        fi
      fi
    fi
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
  - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
  # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python 
  # protobuf version.
-  - pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
+  - pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
 script:
  - paddle/scripts/travis/main.sh
 notifications:
--- a/3
+++ b/3
@ -29,13 +29,16 @@ Luo, Tao
 Lyu, Qin
 Mao, Hongyue
 Qian, Xiaojun
 Qiao, Longfei
 Qi, Jun
 Qin, Duohao
 Shen, Guolong
 Shi, Guangchuan
 Song, Xiang
 Wang, Helin
 Wang, Jiang
 Wang, Yanfei
 Wang, Yi
 Wang, Yong
 Weng, Renliang
 Xu, Tianbing
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit 22ed2a01aee872f055b5f5f212428f481cefc10d
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
    ${source}
    ${destination}
    COMMENT "Generating sphinx documentation: ${builder}"
-    COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
+    COMMAND cd ${destination} && ln -s ./index_*.html index.html
    )
  set_property(
--- a/cmake/coverallsGcovJsons.cmake
+++ b/cmake/coverallsGcovJsons.cmake
@ -110,14 +110,13 @@ endmacro()
 # Get the coverage data.
 file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
-message("GCDA files:")
+message("Process GCDA files:")
 message("===============================")
 # Get a list of all the object directories needed by gcov
 # (The directories the .gcda files and .o files are found in)
 # and run gcov on those.
 foreach(GCDA ${GCDA_FILES})
 	message("Process: ${GCDA}")
 	message("------------------------------------------------------------------------------")
 	get_filename_component(GCDA_DIR ${GCDA} PATH)
 	#
@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
 	# If -p is not specified then the file is named only "the_file.c.gcov"
 	#
 	execute_process(
-		COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
+		COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null"
 		WORKING_DIRECTORY ${GCDA_DIR}
 	)
 endforeach()
@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
 	set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
 	# Generate the final JSON for this file.
 	message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
 	string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
 	set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
 endforeach()
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -14,46 +14,50 @@
 INCLUDE(ExternalProject)
-SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
+FIND_PACKAGE(Protobuf 3.1)
 SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
 SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
-INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
+IF(NOT PROTOBUF_FOUND)
    SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
    SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
    SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
    IF(WIN32)
        SET(PROTOBUF_LITE_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
        SET(PROTOBUF_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
        SET(PROTOBUF_PROTOC_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
        SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
    ELSE(WIN32)
        SET(PROTOBUF_LITE_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
        SET(PROTOBUF_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
        SET(PROTOBUF_PROTOC_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
        SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
    ENDIF(WIN32)
-IF(WIN32)
+    ExternalProject_Add(
-  SET(PROTOBUF_LITE_LIBRARY
+        protobuf
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
+        ${EXTERNAL_PROJECT_LOG_ARGS}
-  SET(PROTOBUF_LIBRARY
+        PREFIX          ${PROTOBUF_SOURCES_DIR}
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
+        UPDATE_COMMAND  ""
-  SET(PROTOBUF_PROTOC_LIBRARY
+        DEPENDS         zlib
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
+        GIT_REPOSITORY  "https://github.com/google/protobuf.git"
-  SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
+        GIT_TAG         "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
-ELSE(WIN32)
+        CONFIGURE_COMMAND
-  SET(PROTOBUF_LITE_LIBRARY
+        ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
+        -Dprotobuf_BUILD_TESTS=OFF
-  SET(PROTOBUF_LIBRARY
+        -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
+        -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-  SET(PROTOBUF_PROTOC_LIBRARY
+        -DCMAKE_BUILD_TYPE=Release
-        "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
+        -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-  SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
+        -DCMAKE_INSTALL_LIBDIR=lib
-ENDIF(WIN32)
+    )
-ExternalProject_Add(
+    LIST(APPEND external_project_dependencies protobuf)
-  protobuf
+ENDIF(NOT PROTOBUF_FOUND)
  ${EXTERNAL_PROJECT_LOG_ARGS}
  PREFIX          ${PROTOBUF_SOURCES_DIR}
  UPDATE_COMMAND  ""
  DEPENDS         zlib
  GIT_REPOSITORY  "https://github.com/google/protobuf.git"
  GIT_TAG         "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
  CONFIGURE_COMMAND
    ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
    -Dprotobuf_BUILD_TESTS=OFF
    -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
    -DCMAKE_BUILD_TYPE=Release
    -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
    -DCMAKE_INSTALL_LIBDIR=lib
 )
-LIST(APPEND external_project_dependencies protobuf)
+INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
 INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
 INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
 MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}")
 MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}")
 MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}")
--- a/demo/image_classification/api_v2_resnet.py
+++ b/demo/image_classification/api_v2_resnet.py
@ -0,0 +1,74 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle.v2 as paddle
 __all__ = ['resnet_cifar10']
 def conv_bn_layer(input,
                  ch_out,
                  filter_size,
                  stride,
                  padding,
                  active_type=paddle.activation.Relu(),
                  ch_in=None):
    tmp = paddle.layer.img_conv(
        input=input,
        filter_size=filter_size,
        num_channels=ch_in,
        num_filters=ch_out,
        stride=stride,
        padding=padding,
        act=paddle.activation.Linear(),
        bias_attr=False)
    return paddle.layer.batch_norm(input=tmp, act=active_type)
 def shortcut(ipt, n_in, n_out, stride):
    if n_in != n_out:
        return conv_bn_layer(ipt, n_out, 1, stride, 0,
                             paddle.activation.Linear())
    else:
        return ipt
 def basicblock(ipt, ch_out, stride):
    ch_in = ch_out * 2
    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
    short = shortcut(ipt, ch_in, ch_out, stride)
    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
 def layer_warp(block_func, ipt, features, count, stride):
    tmp = block_func(ipt, features, stride)
    for i in range(1, count):
        tmp = block_func(tmp, features, 1)
    return tmp
 def resnet_cifar10(ipt, depth=32):
    # depth should be one of 20, 32, 44, 56, 110, 1202
    assert (depth - 2) % 6 == 0
    n = (depth - 2) / 6
    nStages = {16, 64, 128}
    conv1 = conv_bn_layer(
        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
    res1 = layer_warp(basicblock, conv1, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 64, n, 2)
    pool = paddle.layer.img_pool(
        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
    return pool
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@ -0,0 +1,92 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License
 import sys
 import paddle.v2 as paddle
 from api_v2_vgg import vgg_bn_drop
 def main():
    datadim = 3 * 32 * 32
    classdim = 10
    # PaddlePaddle init
    paddle.init(use_gpu=False, trainer_count=1)
    image = paddle.layer.data(
        name="image", type=paddle.data_type.dense_vector(datadim))
    # Add neural network config
    # option 1. resnet
    # net = resnet_cifar10(image, depth=32)
    # option 2. vgg
    net = vgg_bn_drop(image)
    out = paddle.layer.fc(input=net,
                          size=classdim,
                          act=paddle.activation.Softmax())
    lbl = paddle.layer.data(
        name="label", type=paddle.data_type.integer_value(classdim))
    cost = paddle.layer.classification_cost(input=out, label=lbl)
    # Create parameters
    parameters = paddle.parameters.create(cost)
    # Create optimizer
    momentum_optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
        learning_rate=0.1 / 128.0,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=50000 * 100,
        learning_rate_schedule='discexp',
        batch_size=128)
    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                reader=paddle.batch(
                    paddle.dataset.cifar.test10(), batch_size=128),
                feeding={'image': 0,
                         'label': 1})
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
    # Create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=momentum_optimizer)
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.cifar.train10(), buf_size=50000),
            batch_size=128),
        num_passes=5,
        event_handler=event_handler,
        feeding={'image': 0,
                 'label': 1})
 if __name__ == '__main__':
    main()
--- a/demo/image_classification/api_v2_vgg.py
+++ b/demo/image_classification/api_v2_vgg.py
@ -0,0 +1,47 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle.v2 as paddle
 __all__ = ['vgg_bn_drop']
 def vgg_bn_drop(input):
    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
        return paddle.networks.img_conv_group(
            input=ipt,
            num_channels=num_channels,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act=paddle.activation.Relu(),
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type=paddle.pooling.Max())
    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
    bn = paddle.layer.batch_norm(
        input=fc1,
        act=paddle.activation.Relu(),
        layer_attr=paddle.attr.Extra(drop_rate=0.5))
    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
    return fc2
--- a/demo/introduction/api_train_v2.py
+++ b/demo/introduction/api_train_v2.py
@ -0,0 +1,58 @@
 import paddle.v2 as paddle
 import paddle.v2.dataset.uci_housing as uci_housing
 def main():
    # init
    paddle.init(use_gpu=False, trainer_count=1)
    # network config
    x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
    y_predict = paddle.layer.fc(input=x,
                                param_attr=paddle.attr.Param(name='w'),
                                size=1,
                                act=paddle.activation.Linear(),
                                bias_attr=paddle.attr.Param(name='b'))
    y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
    cost = paddle.layer.regression_cost(input=y_predict, label=y)
    # create parameters
    parameters = paddle.parameters.create(cost)
    # create optimizer
    optimizer = paddle.optimizer.Momentum(momentum=0)
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer)
    # event_handler to print training and testing info
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f" % (
                    event.pass_id, event.batch_id, event.cost)
        if isinstance(event, paddle.event.EndPass):
            if (event.pass_id + 1) % 10 == 0:
                result = trainer.test(
                    reader=paddle.batch(
                        uci_housing.test(), batch_size=2),
                    feeding={'x': 0,
                             'y': 1})
                print "Test %d, %.2f" % (event.pass_id, result.cost)
    # training
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                uci_housing.train(), buf_size=500),
            batch_size=2),
        feeding={'x': 0,
                 'y': 1},
        event_handler=event_handler,
        num_passes=30)
 if __name__ == '__main__':
    main()
--- a/demo/mnist/.gitignore
+++ b/demo/mnist/.gitignore
@ -5,3 +5,6 @@ plot.png
 train.log
 *pyc
 .ipynb_checkpoints
 params.pkl
 params.tar
 params.tar.gz
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@ -1,4 +1,58 @@
 import paddle.v2 as paddle
 import gzip
 def softmax_regression(img):
    predict = paddle.layer.fc(input=img,
                              size=10,
                              act=paddle.activation.Softmax())
    return predict
 def multilayer_perceptron(img):
    # The first fully-connected layer
    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
    # The second fully-connected layer and the according activation function
    hidden2 = paddle.layer.fc(input=hidden1,
                              size=64,
                              act=paddle.activation.Relu())
    # The thrid fully-connected layer, note that the hidden size should be 10,
    # which is the number of unique digits
    predict = paddle.layer.fc(input=hidden2,
                              size=10,
                              act=paddle.activation.Softmax())
    return predict
 def convolutional_neural_network(img):
    # first conv layer
    conv_pool_1 = paddle.networks.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
        num_channel=1,
        pool_size=2,
        pool_stride=2,
        act=paddle.activation.Tanh())
    # second conv layer
    conv_pool_2 = paddle.networks.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        num_channel=20,
        pool_size=2,
        pool_stride=2,
        act=paddle.activation.Tanh())
    # The first fully-connected layer
    fc1 = paddle.layer.fc(input=conv_pool_2,
                          size=128,
                          act=paddle.activation.Tanh())
    # The softmax layer, note that the hidden size should be 10,
    # which is the number of unique digits
    predict = paddle.layer.fc(input=fc1,
                              size=10,
                              act=paddle.activation.Softmax())
    return predict
 def main():
@ -9,52 +63,73 @@ def main():
        name='pixel', type=paddle.data_type.dense_vector(784))
    label = paddle.layer.data(
        name='label', type=paddle.data_type.integer_value(10))
    hidden1 = paddle.layer.fc(input=images, size=200)
    hidden2 = paddle.layer.fc(input=hidden1, size=200)
    inference = paddle.layer.fc(input=hidden2,
                                size=10,
                                act=paddle.activation.Softmax())
    cost = paddle.layer.classification_cost(input=inference, label=label)
-    parameters = paddle.parameters.create(cost)
+    # Here we can build the prediction network in different ways. Please
    # choose one by uncomment corresponding line.
    predict = softmax_regression(images)
    #predict = multilayer_perceptron(images)
    #predict = convolutional_neural_network(images)
-    adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01)
+    cost = paddle.layer.classification_cost(input=predict, label=label)
    try:
        with gzip.open('params.tar.gz', 'r') as f:
            parameters = paddle.parameters.Parameters.from_tar(f)
    except IOError:
        parameters = paddle.parameters.create(cost)
    optimizer = paddle.optimizer.Momentum(
        learning_rate=0.1 / 128.0,
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
-                                 update_equation=adam_optimizer)
+                                 update_equation=optimizer)
    lists = []
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 1000 == 0:
-                result = trainer.test(reader=paddle.reader.batched(
+                print "Pass %d, Batch %d, Cost %f, %s" % (
-                    paddle.dataset.mnist.test(), batch_size=256))
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
-                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
+                with gzip.open('params.tar.gz', 'w') as f:
-                    event.pass_id, event.batch_id, event.cost, event.metrics,
+                    parameters.to_tar(f)
                    result.metrics)
-        else:
+        elif isinstance(event, paddle.event.EndPass):
-            pass
+            result = trainer.test(reader=paddle.batch(
                paddle.dataset.mnist.test(), batch_size=128))
            print "Test with Pass %d, Cost %f, %s\n" % (
                event.pass_id, result.cost, result.metrics)
            lists.append((event.pass_id, result.cost,
                          result.metrics['classification_error_evaluator']))
    trainer.train(
-        reader=paddle.reader.batched(
+        reader=paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
-            batch_size=32),
+            batch_size=128),
-        event_handler=event_handler)
+        event_handler=event_handler,
        num_passes=100)
    # find the best pass
    best = sorted(lists, key=lambda list: float(list[1]))[0]
    print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
    print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
    test_creator = paddle.dataset.mnist.test()
    test_data = []
    for item in test_creator():
        test_data.append((item[0], ))
        if len(test_data) == 100:
            break
    # output is a softmax layer. It returns probabilities.
    # Shape should be (100, 10)
    probs = paddle.infer(
-        output=inference,
+        output_layer=predict, parameters=parameters, input=test_data)
        parameters=parameters,
        reader=paddle.reader.batched(
            paddle.reader.firstn(
                paddle.reader.map_readers(lambda item: (item[0], ),
                                          paddle.dataset.mnist.test()),
                n=100),
            batch_size=32))
    print probs.shape
--- a/demo/recommendation/api_train_v2.py
+++ b/demo/recommendation/api_train_v2.py
@ -0,0 +1,125 @@
 import paddle.v2 as paddle
 import cPickle
 import copy
 def main():
    paddle.init(use_gpu=False)
    movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
    uid = paddle.layer.data(
        name='user_id',
        type=paddle.data_type.integer_value(
            paddle.dataset.movielens.max_user_id() + 1))
    usr_emb = paddle.layer.embedding(input=uid, size=32)
    usr_gender_id = paddle.layer.data(
        name='gender_id', type=paddle.data_type.integer_value(2))
    usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
    usr_age_id = paddle.layer.data(
        name='age_id',
        type=paddle.data_type.integer_value(
            len(paddle.dataset.movielens.age_table)))
    usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
    usr_job_id = paddle.layer.data(
        name='job_id',
        type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
        ) + 1))
    usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
    usr_combined_features = paddle.layer.fc(
        input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
        size=200,
        act=paddle.activation.Tanh())
    mov_id = paddle.layer.data(
        name='movie_id',
        type=paddle.data_type.integer_value(
            paddle.dataset.movielens.max_movie_id() + 1))
    mov_emb = paddle.layer.embedding(input=mov_id, size=32)
    mov_categories = paddle.layer.data(
        name='category_id',
        type=paddle.data_type.sparse_binary_vector(
            len(paddle.dataset.movielens.movie_categories())))
    mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
    mov_title_id = paddle.layer.data(
        name='movie_title',
        type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
    mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
    mov_title_conv = paddle.networks.sequence_conv_pool(
        input=mov_title_emb, hidden_size=32, context_len=3)
    mov_combined_features = paddle.layer.fc(
        input=[mov_emb, mov_categories_hidden, mov_title_conv],
        size=200,
        act=paddle.activation.Tanh())
    inference = paddle.layer.cos_sim(
        a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
    cost = paddle.layer.regression_cost(
        input=inference,
        label=paddle.layer.data(
            name='score', type=paddle.data_type.dense_vector(1)))
    parameters = paddle.parameters.create(cost)
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=paddle.optimizer.Adam(
                                     learning_rate=1e-4))
    feeding = {
        'user_id': 0,
        'gender_id': 1,
        'age_id': 2,
        'job_id': 3,
        'movie_id': 4,
        'category_id': 5,
        'movie_title': 6,
        'score': 7
    }
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d Batch %d Cost %.2f" % (
                    event.pass_id, event.batch_id, event.cost)
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.movielens.train(), buf_size=8192),
            batch_size=256),
        event_handler=event_handler,
        feeding=feeding,
        num_passes=1)
    user_id = 234
    movie_id = 345
    user = paddle.dataset.movielens.user_info()[user_id]
    movie = paddle.dataset.movielens.movie_info()[movie_id]
    feature = user.value() + movie.value()
    def reader():
        yield feature
    infer_dict = copy.copy(feeding)
    del infer_dict['score']
    prediction = paddle.infer(
        output=inference,
        parameters=parameters,
        reader=paddle.batch(
            reader, batch_size=32),
        feeding=infer_dict)
    print(prediction + 5) / 2
 if __name__ == '__main__':
    main()
--- a/demo/semantic_role_labeling/api_train_v2.py
+++ b/demo/semantic_role_labeling/api_train_v2.py
@ -0,0 +1,190 @@
 import sys
 import math
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.dataset.conll05 as conll05
 def db_lstm():
    word_dict, verb_dict, label_dict = conll05.get_dict()
    word_dict_len = len(word_dict)
    label_dict_len = len(label_dict)
    pred_len = len(verb_dict)
    mark_dict_len = 2
    word_dim = 32
    mark_dim = 5
    hidden_dim = 512
    depth = 8
    #8 features
    def d_type(size):
        return paddle.data_type.integer_value_sequence(size)
    word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
    predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
    ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
    ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
    ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
    ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
    ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
    mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
    target = paddle.layer.data(name='target', type=d_type(label_dict_len))
    default_std = 1 / math.sqrt(hidden_dim) / 3.0
    emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
    std_0 = paddle.attr.Param(initial_std=0.)
    std_default = paddle.attr.Param(initial_std=default_std)
    predicate_embedding = paddle.layer.embedding(
        size=word_dim,
        input=predicate,
        param_attr=paddle.attr.Param(
            name='vemb', initial_std=default_std))
    mark_embedding = paddle.layer.embedding(
        size=mark_dim, input=mark, param_attr=std_0)
    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
        paddle.layer.embedding(
            size=word_dim, input=x, param_attr=emb_para) for x in word_input
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)
    hidden_0 = paddle.layer.mixed(
        size=hidden_dim,
        bias_attr=std_default,
        input=[
            paddle.layer.full_matrix_projection(
                input=emb, param_attr=std_default) for emb in emb_layers
        ])
    mix_hidden_lr = 1e-3
    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
    hidden_para_attr = paddle.attr.Param(
        initial_std=default_std, learning_rate=mix_hidden_lr)
    lstm_0 = paddle.layer.lstmemory(
        input=hidden_0,
        act=paddle.activation.Relu(),
        gate_act=paddle.activation.Sigmoid(),
        state_act=paddle.activation.Sigmoid(),
        bias_attr=std_0,
        param_attr=lstm_para_attr)
    #stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]
    for i in range(1, depth):
        mix_hidden = paddle.layer.mixed(
            size=hidden_dim,
            bias_attr=std_default,
            input=[
                paddle.layer.full_matrix_projection(
                    input=input_tmp[0], param_attr=hidden_para_attr),
                paddle.layer.full_matrix_projection(
                    input=input_tmp[1], param_attr=lstm_para_attr)
            ])
        lstm = paddle.layer.lstmemory(
            input=mix_hidden,
            act=paddle.activation.Relu(),
            gate_act=paddle.activation.Sigmoid(),
            state_act=paddle.activation.Sigmoid(),
            reverse=((i % 2) == 1),
            bias_attr=std_0,
            param_attr=lstm_para_attr)
        input_tmp = [mix_hidden, lstm]
    feature_out = paddle.layer.mixed(
        size=label_dict_len,
        bias_attr=std_default,
        input=[
            paddle.layer.full_matrix_projection(
                input=input_tmp[0], param_attr=hidden_para_attr),
            paddle.layer.full_matrix_projection(
                input=input_tmp[1], param_attr=lstm_para_attr)
        ], )
    crf_cost = paddle.layer.crf(size=label_dict_len,
                                input=feature_out,
                                label=target,
                                param_attr=paddle.attr.Param(
                                    name='crfw',
                                    initial_std=default_std,
                                    learning_rate=mix_hidden_lr))
    crf_dec = paddle.layer.crf_decoding(
        name='crf_dec_l',
        size=label_dict_len,
        input=feature_out,
        label=target,
        param_attr=paddle.attr.Param(name='crfw'))
    return crf_cost, crf_dec
 def load_parameter(file_name, h, w):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32).reshape(h, w)
 def main():
    paddle.init(use_gpu=False, trainer_count=1)
    # define network topology
    crf_cost, crf_dec = db_lstm()
    # create parameters
    parameters = paddle.parameters.create([crf_cost, crf_dec])
    # create optimizer
    optimizer = paddle.optimizer.Momentum(
        momentum=0,
        learning_rate=2e-2,
        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
        model_average=paddle.optimizer.ModelAverage(
            average_window=0.5, max_average_window=10000), )
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
    trainer = paddle.trainer.SGD(cost=crf_cost,
                                 parameters=parameters,
                                 update_equation=optimizer)
    parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
    trn_reader = paddle.batch(
        paddle.reader.shuffle(
            conll05.test(), buf_size=8192), batch_size=10)
    feeding = {
        'word_data': 0,
        'ctx_n2_data': 1,
        'ctx_n1_data': 2,
        'ctx_0_data': 3,
        'ctx_p1_data': 4,
        'ctx_p2_data': 5,
        'verb_data': 6,
        'mark_data': 7,
        'target': 8
    }
    trainer.train(
        reader=trn_reader,
        event_handler=event_handler,
        num_passes=10000,
        feeding=feeding)
 if __name__ == '__main__':
    main()
--- a/demo/sentiment/train_v2.py
+++ b/demo/sentiment/train_v2.py
@ -0,0 +1,159 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import sys
 import paddle.v2 as paddle
 def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
    data = paddle.layer.data("word",
                             paddle.data_type.integer_value_sequence(input_dim))
    emb = paddle.layer.embedding(input=data, size=emb_dim)
    conv_3 = paddle.networks.sequence_conv_pool(
        input=emb, context_len=3, hidden_size=hid_dim)
    conv_4 = paddle.networks.sequence_conv_pool(
        input=emb, context_len=4, hidden_size=hid_dim)
    output = paddle.layer.fc(input=[conv_3, conv_4],
                             size=class_dim,
                             act=paddle.activation.Softmax())
    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
    cost = paddle.layer.classification_cost(input=output, label=lbl)
    return cost
 def stacked_lstm_net(input_dim,
                     class_dim=2,
                     emb_dim=128,
                     hid_dim=512,
                     stacked_num=3):
    """
    A Wrapper for sentiment classification task.
    This network uses bi-directional recurrent network,
    consisting three LSTM layers. This configure is referred to
    the paper as following url, but use fewer layrs.
        http://www.aclweb.org/anthology/P15-1109
    input_dim: here is word dictionary dimension.
    class_dim: number of categories.
    emb_dim: dimension of word embedding.
    hid_dim: dimension of hidden layer.
    stacked_num: number of stacked lstm-hidden layer.
    """
    assert stacked_num % 2 == 1
    layer_attr = paddle.attr.Extra(drop_rate=0.5)
    fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
    lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
    para_attr = [fc_para_attr, lstm_para_attr]
    bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
    relu = paddle.activation.Relu()
    linear = paddle.activation.Linear()
    data = paddle.layer.data("word",
                             paddle.data_type.integer_value_sequence(input_dim))
    emb = paddle.layer.embedding(input=data, size=emb_dim)
    fc1 = paddle.layer.fc(input=emb,
                          size=hid_dim,
                          act=linear,
                          bias_attr=bias_attr)
    lstm1 = paddle.layer.lstmemory(
        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
    inputs = [fc1, lstm1]
    for i in range(2, stacked_num + 1):
        fc = paddle.layer.fc(input=inputs,
                             size=hid_dim,
                             act=linear,
                             param_attr=para_attr,
                             bias_attr=bias_attr)
        lstm = paddle.layer.lstmemory(
            input=fc,
            reverse=(i % 2) == 0,
            act=relu,
            bias_attr=bias_attr,
            layer_attr=layer_attr)
        inputs = [fc, lstm]
    fc_last = paddle.layer.pooling(
        input=inputs[0], pooling_type=paddle.pooling.Max())
    lstm_last = paddle.layer.pooling(
        input=inputs[1], pooling_type=paddle.pooling.Max())
    output = paddle.layer.fc(input=[fc_last, lstm_last],
                             size=class_dim,
                             act=paddle.activation.Softmax(),
                             bias_attr=bias_attr,
                             param_attr=para_attr)
    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
    cost = paddle.layer.classification_cost(input=output, label=lbl)
    return cost
 if __name__ == '__main__':
    # init
    paddle.init(use_gpu=False)
    #data
    print 'load dictionary...'
    word_dict = paddle.dataset.imdb.word_dict()
    dict_dim = len(word_dict)
    class_dim = 2
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
        batch_size=100)
    test_reader = paddle.batch(
        lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
    feeding = {'word': 0, 'label': 1}
    # network config
    # Please choose the way to build the network
    # by uncommenting the corresponding line.
    cost = convolution_net(dict_dim, class_dim=class_dim)
    # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
    # create parameters
    parameters = paddle.parameters.create(cost)
    # create optimizer
    adam_optimizer = paddle.optimizer.Adam(
        learning_rate=2e-3,
        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
        model_average=paddle.optimizer.ModelAverage(average_window=0.5))
    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(reader=test_reader, feeding=feeding)
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
    # create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=adam_optimizer)
    trainer.train(
        reader=train_reader,
        event_handler=event_handler,
        feeding=feeding,
        num_passes=2)
--- a/demo/seqToseq/api_train_v2.py
+++ b/demo/seqToseq/api_train_v2.py
@ -0,0 +1,146 @@
 import sys
 import paddle.v2 as paddle
 def seqToseq_net(source_dict_dim, target_dict_dim):
    ### Network Architecture
    word_vector_dim = 512  # dimension of word vector
    decoder_size = 512  # dimension of hidden unit in GRU Decoder network
    encoder_size = 512  # dimension of hidden unit in GRU Encoder network
    #### Encoder
    src_word_id = paddle.layer.data(
        name='source_language_word',
        type=paddle.data_type.integer_value_sequence(source_dict_dim))
    src_embedding = paddle.layer.embedding(
        input=src_word_id,
        size=word_vector_dim,
        param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
    src_forward = paddle.networks.simple_gru(
        input=src_embedding, size=encoder_size)
    src_backward = paddle.networks.simple_gru(
        input=src_embedding, size=encoder_size, reverse=True)
    encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
    #### Decoder
    with paddle.layer.mixed(size=decoder_size) as encoded_proj:
        encoded_proj += paddle.layer.full_matrix_projection(
            input=encoded_vector)
    backward_first = paddle.layer.first_seq(input=src_backward)
    with paddle.layer.mixed(
            size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
        decoder_boot += paddle.layer.full_matrix_projection(
            input=backward_first)
    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
        decoder_mem = paddle.layer.memory(
            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
        context = paddle.networks.simple_attention(
            encoded_sequence=enc_vec,
            encoded_proj=enc_proj,
            decoder_state=decoder_mem)
        with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
            decoder_inputs += paddle.layer.full_matrix_projection(input=context)
            decoder_inputs += paddle.layer.full_matrix_projection(
                input=current_word)
        gru_step = paddle.layer.gru_step(
            name='gru_decoder',
            input=decoder_inputs,
            output_mem=decoder_mem,
            size=decoder_size)
        with paddle.layer.mixed(
                size=target_dict_dim,
                bias_attr=True,
                act=paddle.activation.Softmax()) as out:
            out += paddle.layer.full_matrix_projection(input=gru_step)
        return out
    decoder_group_name = "decoder_group"
    group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
    group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
    group_inputs = [group_input1, group_input2]
    trg_embedding = paddle.layer.embedding(
        input=paddle.layer.data(
            name='target_language_word',
            type=paddle.data_type.integer_value_sequence(target_dict_dim)),
        size=word_vector_dim,
        param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
    group_inputs.append(trg_embedding)
    # For decoder equipped with attention mechanism, in training,
    # target embeding (the groudtruth) is the data input,
    # while encoded source sequence is accessed to as an unbounded memory.
    # Here, the StaticInput defines a read-only memory
    # for the recurrent_group.
    decoder = paddle.layer.recurrent_group(
        name=decoder_group_name,
        step=gru_decoder_with_attention,
        input=group_inputs)
    lbl = paddle.layer.data(
        name='target_language_next_word',
        type=paddle.data_type.integer_value_sequence(target_dict_dim))
    cost = paddle.layer.classification_cost(input=decoder, label=lbl)
    return cost
 def main():
    paddle.init(use_gpu=False, trainer_count=1)
    # source and target dict dim.
    dict_size = 30000
    source_dict_dim = target_dict_dim = dict_size
    # define network topology
    cost = seqToseq_net(source_dict_dim, target_dict_dim)
    parameters = paddle.parameters.create(cost)
    # define optimize method and trainer
    optimizer = paddle.optimizer.Adam(
        learning_rate=5e-5,
        regularization=paddle.optimizer.L2Regularization(rate=1e-3))
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer)
    # define data reader
    feeding = {
        'source_language_word': 0,
        'target_language_word': 1,
        'target_language_next_word': 2
    }
    wmt14_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
        batch_size=5)
    # define event_handler callback
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 10 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
    # start to train
    trainer.train(
        reader=wmt14_reader,
        event_handler=event_handler,
        num_passes=10000,
        feeding=feeding)
 if __name__ == '__main__':
    main()
--- a/demo/word2vec/train_v2.py
+++ b/demo/word2vec/train_v2.py
@ -0,0 +1,80 @@
 import math
 import paddle.v2 as paddle
 dictsize = 1953
 embsize = 32
 hiddensize = 256
 N = 5
 def wordemb(inlayer):
    wordemb = paddle.layer.table_projection(
        input=inlayer,
        size=embsize,
        param_attr=paddle.attr.Param(
            name="_proj",
            initial_std=0.001,
            learning_rate=1,
            l2_rate=0, ))
    return wordemb
 def main():
    paddle.init(use_gpu=False, trainer_count=1)
    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)
    firstword = paddle.layer.data(
        name="firstw", type=paddle.data_type.integer_value(dict_size))
    secondword = paddle.layer.data(
        name="secondw", type=paddle.data_type.integer_value(dict_size))
    thirdword = paddle.layer.data(
        name="thirdw", type=paddle.data_type.integer_value(dict_size))
    fourthword = paddle.layer.data(
        name="fourthw", type=paddle.data_type.integer_value(dict_size))
    nextword = paddle.layer.data(
        name="fifthw", type=paddle.data_type.integer_value(dict_size))
    Efirst = wordemb(firstword)
    Esecond = wordemb(secondword)
    Ethird = wordemb(thirdword)
    Efourth = wordemb(fourthword)
    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
    hidden1 = paddle.layer.fc(input=contextemb,
                              size=hiddensize,
                              act=paddle.activation.Sigmoid(),
                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
                              bias_attr=paddle.attr.Param(learning_rate=2),
                              param_attr=paddle.attr.Param(
                                  initial_std=1. / math.sqrt(embsize * 8),
                                  learning_rate=1))
    predictword = paddle.layer.fc(input=hidden1,
                                  size=dict_size,
                                  bias_attr=paddle.attr.Param(learning_rate=2),
                                  act=paddle.activation.Softmax())
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                result = trainer.test(
                    paddle.batch(
                        paddle.dataset.imikolov.test(word_dict, N), 32))
                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics,
                    result.metrics)
    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
    parameters = paddle.parameters.create(cost)
    adam_optimizer = paddle.optimizer.Adam(
        learning_rate=3e-3,
        regularization=paddle.optimizer.L2Regularization(8e-4))
    trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
    trainer.train(
        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
        num_passes=30,
        event_handler=event_handler)
 if __name__ == '__main__':
    main()
--- a/doc/api/index_cn.rst
+++ b/doc/api/index_cn.rst
@ -1,37 +1,26 @@
-API中文手册
+API
-============
+===
-DataProvider API
+模型配置 API
----------------
+------------
 ..  toctree::
    :maxdepth: 1
-    data_provider/dataprovider_cn.rst
+    v2/model_configs.rst
    data_provider/pydataprovider2_cn.rst
-..  _api_trainer_config:
+数据 API
-
+--------
 Model Config API
 ----------------
 ..  toctree::
    :maxdepth: 1
-    trainer_config_helpers/optimizers.rst
+    v2/data.rst
    trainer_config_helpers/data_sources.rst
    trainer_config_helpers/layers.rst
    trainer_config_helpers/activations.rst 
    trainer_config_helpers/poolings.rst
    trainer_config_helpers/networks.rst
    trainer_config_helpers/evaluators.rst
    trainer_config_helpers/attrs.rst
-Applications API
+训练 API
----------------
+--------
-..  toctree::
+..	toctree::
-    :maxdepth: 1
+	:maxdepth: 1
-    predict/swig_py_paddle_cn.rst
+	v2/run_logic.rst
--- a/doc/api/index_en.rst
+++ b/doc/api/index_en.rst
@ -1,37 +1,26 @@
 API
 ===
-DataProvider API
+Model Config API
 ----------------
 ..  toctree::
    :maxdepth: 1
-    data_provider/dataprovider_en.rst
+    v2/model_configs.rst
    data_provider/pydataprovider2_en.rst
 ..  _api_trainer_config:
-Model Config API
+Data API
----------------
+--------
 ..  toctree::
    :maxdepth: 1
-    trainer_config_helpers/optimizers.rst
+    v2/data.rst
    trainer_config_helpers/data_sources.rst
    trainer_config_helpers/layers.rst
    trainer_config_helpers/activations.rst 
    trainer_config_helpers/poolings.rst
    trainer_config_helpers/networks.rst
    trainer_config_helpers/evaluators.rst
    trainer_config_helpers/attrs.rst
 Train API
 ---------
-Applications API
+..	toctree::
----------------
+	:maxdepth: 1
 ..  toctree::
    :maxdepth: 1
-    predict/swig_py_paddle_en.rst
+	v2/run_logic.rst
--- a/doc/api/v1/data_provider/dataprovider_cn.rst
+++ b/doc/api/v1/data_provider/dataprovider_cn.rst
--- a/doc/api/v1/data_provider/dataprovider_en.rst
+++ b/doc/api/v1/data_provider/dataprovider_en.rst
--- a/doc/api/v1/data_provider/pydataprovider2_cn.rst
+++ b/doc/api/v1/data_provider/pydataprovider2_cn.rst
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`Subproject commit 22ed2a01aee872f055b5f5f212428f481cefc10d`