Merge pull request #1 from PaddlePaddle/develop

update from origin
9 years ago · 4a0cb3db6c
parent 2d449fafe8 fefb3c132b
commit 4a0cb3db6c
527 changed files with 5471 additions and 6559 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,3 +9,6 @@ build/
 .pydevproject
 Makefile
 .test_env/
+
+*~
+bazel-*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -7,6 +7,7 @@
    sha: v0.13.2
    hooks:
    - id: yapf
+      files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$  # Bazel BUILD files follow Python syntax.
 -   repo: https://github.com/pre-commit/pre-commit-hooks
    sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
    hooks:
--- a/.travis.yml
+++ b/.travis.yml
@ -8,10 +8,13 @@ os:
 env:
  - JOB=DOCS
  - JOB=BUILD_AND_TEST
+  - JOB=PRE_COMMIT
 matrix:
  exclude:
    - os: osx
-      env: JOB=DOCS  # Only generate documentation in linux
+      env: JOB=DOCS  # Only generate documentation in linux.
+    - os: osx
+      env: JOB=PRE_COMMIT # Only check pre-commit hook in linux

 addons:
  apt:
@ -26,10 +29,6 @@ addons:
      - python-pip
      - python2.7-dev
      - m4
-      - libprotobuf-dev
-      - doxygen
-      - protobuf-compiler
-      - python-protobuf
      - python-numpy
      - python-wheel
      - libgoogle-glog-dev
@ -39,18 +38,25 @@ addons:
      - lcov
      - graphviz
      - swig
+      - clang-format-3.8
+      - automake
+      - libtool
 before_install:
  - |
    if [ ${JOB} == "BUILD_AND_TEST" ]; then
-      if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
+      local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE`
+      if [ $? -eq 0 ]; then  # if git diff return no zero, then rerun unit test.
+        if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
        then
          echo "Only markdown docs were updated, stopping build process."
          exit
        fi
      fi
+    fi
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
-  - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy sphinx_rtd_theme
+  - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
+  - pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
 script:
  - paddle/scripts/travis/main.sh
 notifications:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -25,8 +25,8 @@ find_package(ZLIB REQUIRED)
 find_package(NumPy REQUIRED)
 find_package(Threads REQUIRED)
 find_package(AVX QUIET)
-find_package(Glog)
-find_package(Gflags QUIET)
+find_package(Glog REQUIRED)
+find_package(Gflags REQUIRED)
 find_package(GTest)
 find_package(Sphinx)
 find_package(Doxygen)
@ -40,8 +40,6 @@ option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
 option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
 option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
 option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
-option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
-option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
 option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
 option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
 option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
@ -136,16 +134,12 @@ else(WITH_RDMA)
  add_definitions(-DPADDLE_DISABLE_RDMA)
 endif(WITH_RDMA)

-if(WITH_GLOG)
-    add_definitions(-DPADDLE_USE_GLOG)
-    include_directories(${LIBGLOG_INCLUDE_DIR})
-endif()
+# glog
+include_directories(${LIBGLOG_INCLUDE_DIR})

-if(WITH_GFLAGS)
-    add_definitions(-DPADDLE_USE_GFLAGS)
-    add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
-    include_directories(${GFLAGS_INCLUDE_DIRS})
-endif()
+#gflags
+add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
+include_directories(${GFLAGS_INCLUDE_DIRS})

 if(WITH_TESTING)
    enable_testing()
@ -169,5 +163,4 @@ add_subdirectory(paddle)
 add_subdirectory(python)
 if(WITH_DOC)
    add_subdirectory(doc)
-    add_subdirectory(doc_cn)
 endif()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1 @@
+./doc/howto/dev/contribute_to_paddle_en.md
--- a/31
+++ b/31
@ -0,0 +1,31 @@
+# External dependency to Google protobuf.
+http_archive(
+    name="protobuf",
+    url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
+    sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
+    strip_prefix="protobuf-3.1.0")
+
+# External dependency to gtest 1.7.0.  This method comes from
+# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
+new_http_archive(
+    name="gtest",
+    url="https://github.com/google/googletest/archive/release-1.7.0.zip",
+    sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
+    build_file="third_party/gtest.BUILD",
+    strip_prefix="googletest-release-1.7.0")
+
+# External dependency to gflags.  This method comes from
+# https://github.com/gflags/example/blob/master/WORKSPACE.
+new_git_repository(
+    name="gflags",
+    tag="v2.2.0",
+    remote="https://github.com/gflags/gflags.git",
+    build_file="third_party/gflags.BUILD")
+
+# External dependency to glog.  This method comes from
+# https://github.com/reyoung/bazel_playground/blob/master/WORKSPACE
+new_git_repository(
+    name="glog",
+    remote="https://github.com/google/glog.git",
+    commit="b6a5e0524c28178985f0d228e9eaa43808dbec3c",
+    build_file="third_party/glog.BUILD")
--- a/benchmark/tensorflow/rnn/run_multi.sh
+++ b/benchmark/tensorflow/rnn/run_multi.sh
@ -25,4 +25,3 @@ test 4 2 256 512
 test 4 2 512 128 
 test 4 2 512 256 
 test 4 2 512 512 
-
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@ -72,6 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
    ${source}
    ${destination}
    COMMENT "Generating sphinx documentation: ${builder}"
+    COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
    )

  set_property(
--- a/cmake/check_packages.cmake
+++ b/cmake/check_packages.cmake
@ -14,13 +14,9 @@ if(WITH_STYLE_CHECK)
  find_package(PythonInterp REQUIRED)
 endif()

-if(WITH_GLOG)
-  find_package(Glog REQUIRED)
-endif()
+find_package(Glog REQUIRED)

-if(WITH_GFLAGS)
-  find_package(Gflags REQUIRED)
-endif()
+find_package(Gflags REQUIRED)

 if(WITH_TESTING)
  find_package(GTest REQUIRED)
@ -28,9 +24,7 @@ endif()

 if(WITH_DOC)
  find_package(Sphinx REQUIRED)
-  find_package(Doxygen REQUIRED)
  find_python_module(recommonmark REQUIRED)
-  find_python_module(breathe REQUIRED)
 endif()

 if(WITH_SWIG_PY)
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@ -65,7 +65,7 @@ endmacro()
 # link_paddle_exe
 # add paddle library for a paddle executable, such as trainer, pserver.
 #
-# It will handle WITH_PYTHON/WITH_GLOG etc.
+# It will handle WITH_PYTHON etc.
 function(link_paddle_exe TARGET_NAME)
    if(WITH_RDMA)
        generate_rdma_links()
@ -108,6 +108,8 @@ function(link_paddle_exe TARGET_NAME)
        paddle_cuda
        ${METRIC_LIBS}
        ${PROTOBUF_LIBRARY}
+        ${LIBGLOG_LIBRARY}
+        ${GFLAGS_LIBRARIES}
        ${CMAKE_THREAD_LIBS_INIT}
        ${CBLAS_LIBS}
        ${ZLIB_LIBRARIES}
@ -125,16 +127,6 @@ function(link_paddle_exe TARGET_NAME)
            ${PYTHON_LIBRARIES})
    endif()

-    if(WITH_GLOG)
-        target_link_libraries(${TARGET_NAME}
-            ${LIBGLOG_LIBRARY})
-    endif()
-
-    if(WITH_GFLAGS)
-        target_link_libraries(${TARGET_NAME}
-            ${GFLAGS_LIBRARIES})
-    endif()
-
    if(WITH_GPU)
        if(NOT WITH_DSO OR WITH_METRIC)
            target_link_libraries(${TARGET_NAME}
@ -206,5 +198,5 @@ function(create_resources res_file output)
    # Convert hex data for C compatibility
    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
    # Append data to output file
-    file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
+    file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
 endfunction()
--- a/demo/gan/data/download_cifar.sh
+++ b/demo/gan/data/download_cifar.sh
@ -1,3 +1,4 @@
+#!/bin/bash
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@ -15,4 +16,3 @@ set -e
 wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
 tar zxf cifar-10-python.tar.gz
 rm cifar-10-python.tar.gz
-
--- a/demo/gan/data/get_mnist_data.sh
+++ b/demo/gan/data/get_mnist_data.sh
@ -15,5 +15,3 @@ do
        gunzip ${fname}.gz
    fi
 done
-
-
--- a/demo/gan/gan_conf.py
+++ b/demo/gan/gan_conf.py
@ -14,10 +14,9 @@
 from paddle.trainer_config_helpers import *

 mode = get_config_arg("mode", str, "generator")
-assert mode in set(["generator",
-                    "discriminator",
-                    "generator_training",
-                    "discriminator_training"])
+assert mode in set([
+    "generator", "discriminator", "generator_training", "discriminator_training"
+])

 is_generator_training = mode == "generator_training"
 is_discriminator_training = mode == "discriminator_training"
@ -38,8 +37,8 @@ sample_dim = 2
 settings(
    batch_size=128,
    learning_rate=1e-4,
-    learning_method=AdamOptimizer(beta1=0.5)
-)
+    learning_method=AdamOptimizer(beta1=0.5))
+

 def discriminator(sample):
    """
@ -50,71 +49,88 @@ def discriminator(sample):
    of the sample is from real data.
    """
    param_attr = ParamAttr(is_static=is_generator_training)
-    bias_attr = ParamAttr(is_static=is_generator_training,
-                          initial_mean=1.0,
-                          initial_std=0)
+    bias_attr = ParamAttr(
+        is_static=is_generator_training, initial_mean=1.0, initial_std=0)

-    hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
+    hidden = fc_layer(
+        input=sample,
+        name="dis_hidden",
+        size=hidden_dim,
        bias_attr=bias_attr,
        param_attr=param_attr,
        act=ReluActivation())

-    hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
+    hidden2 = fc_layer(
+        input=hidden,
+        name="dis_hidden2",
+        size=hidden_dim,
        bias_attr=bias_attr,
        param_attr=param_attr,
        act=LinearActivation())

-    hidden_bn = batch_norm_layer(hidden2, 
+    hidden_bn = batch_norm_layer(
+        hidden2,
        act=ReluActivation(),
        name="dis_hidden_bn",
        bias_attr=bias_attr,
-                     param_attr=ParamAttr(is_static=is_generator_training,
-                           initial_mean=1.0,
+        param_attr=ParamAttr(
+            is_static=is_generator_training, initial_mean=1.0,
            initial_std=0.02),
        use_global_stats=False)

-    return fc_layer(input=hidden_bn, name="dis_prob", size=2,
+    return fc_layer(
+        input=hidden_bn,
+        name="dis_prob",
+        size=2,
        bias_attr=bias_attr,
        param_attr=param_attr,
        act=SoftmaxActivation())

+
 def generator(noise):
    """
    generator generates a sample given noise
    """
    param_attr = ParamAttr(is_static=is_discriminator_training)
-    bias_attr = ParamAttr(is_static=is_discriminator_training,
-                           initial_mean=1.0,
-                           initial_std=0)
+    bias_attr = ParamAttr(
+        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)

-    hidden = fc_layer(input=noise,
+    hidden = fc_layer(
+        input=noise,
        name="gen_layer_hidden",
        size=hidden_dim,
        bias_attr=bias_attr,
        param_attr=param_attr,
        act=ReluActivation())

-    hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
+    hidden2 = fc_layer(
+        input=hidden,
+        name="gen_hidden2",
+        size=hidden_dim,
        bias_attr=bias_attr,
        param_attr=param_attr,
        act=LinearActivation())

-    hidden_bn = batch_norm_layer(hidden2, 
+    hidden_bn = batch_norm_layer(
+        hidden2,
        act=ReluActivation(),
        name="gen_layer_hidden_bn",
        bias_attr=bias_attr,
-                     param_attr=ParamAttr(is_static=is_discriminator_training,
+        param_attr=ParamAttr(
+            is_static=is_discriminator_training,
            initial_mean=1.0,
            initial_std=0.02),
        use_global_stats=False)

-    return fc_layer(input=hidden_bn,
+    return fc_layer(
+        input=hidden_bn,
        name="gen_layer1",
        size=sample_dim,
        bias_attr=bias_attr,
        param_attr=param_attr,
        act=LinearActivation())

+
 if is_generator_training:
    noise = data_layer(name="noise", size=noise_dim)
    sample = generator(noise)
@ -126,7 +142,8 @@ if is_generator_training or is_discriminator_training:
    label = data_layer(name="label", size=1)
    prob = discriminator(sample)
    cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+    classification_error_evaluator(
+        input=prob, label=label, name=mode + '_error')
    outputs(cost)

 if is_generator:
--- a/demo/gan/gan_conf_image.py
+++ b/demo/gan/gan_conf_image.py
--- a/demo/gan/gan_trainer.py
+++ b/demo/gan/gan_trainer.py
--- a/demo/image_classification/data/download_cifar.sh
+++ b/demo/image_classification/data/download_cifar.sh
@ -1,3 +1,4 @@
+#!/bin/bash
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@ -21,7 +21,7 @@ from paddle.trainer.PyDataProvider2 import *

 #
 # {'img_size': 32,
-# 'settings': <paddle.trainer.PyDataProviderWrapper.Cls instance at 0x7fea27cb6050>,
+# 'settings': a global object,
 # 'color': True,
 # 'mean_img_size': 32,
 # 'meta': './data/cifar-out/batches/batches.meta',
@ -50,10 +50,10 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,

    settings.logger.info('Image size: %s', settings.img_size)
    settings.logger.info('Meta path: %s', settings.meta_path)
-    settings.input_types = [
-        dense_vector(settings.img_raw_size),  # image feature
-        integer_value(settings.num_classes)
-    ]  # labels
+    settings.input_types = {
+        'image': dense_vector(settings.img_raw_size),
+        'label': integer_value(settings.num_classes)
+    }

    settings.logger.info('DataProvider Initialization finished')

@ -83,4 +83,7 @@ def processData(settings, file_list):
                        img, settings.img_mean, settings.img_size,
                        settings.is_train, settings.color)
                    label = data['labels'][i]
-                    yield img_feat.astype('float32'), int(label)
+                    yield {
+                        'image': img_feat.astype('float32'),
+                        'label': int(label)
+                    }
--- a/demo/introduction/.gitignore
+++ b/demo/introduction/.gitignore
@ -0,0 +1,5 @@
+dataprovider.pyc
+empty.list
+train.log
+output
+train.list
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@ -17,8 +17,10 @@ import random


 # define data types of input: 2 real numbers
-@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
+@provider(
+    input_types={'x': dense_vector(1),
+                 'y': dense_vector(1)}, use_seq=False)
 def process(settings, input_file):
    for i in xrange(2000):
        x = random.random()
-        yield [x], [2 * x + 0.3]
+        yield {'x': [x], 'y': [2 * x + 0.3]}
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@ -15,11 +15,8 @@
 from paddle.trainer_config_helpers import *

 # 1. read data. Suppose you saved above python code as dataprovider.py
-data_file = 'empty.list'
-with open(data_file, 'w') as f:
-    f.writelines(' ')
 define_py_data_sources2(
-    train_list=data_file,
+    train_list=['no_matter.txt'],
    test_list=None,
    module='dataprovider',
    obj='process',
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@ -1,10 +1,12 @@
 from paddle.trainer.PyDataProvider2 import *
+import numpy


 # Define a py data provider
@provider(
    input_types={'pixel': dense_vector(28 * 28),
-                 'label': integer_value(10)})
+                 'label': integer_value(10)},
+    cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):  # settings is not used currently.
    imgf = filename + "-images-idx3-ubyte"
    labelf = filename + "-labels-idx1-ubyte"
@ -20,12 +22,13 @@ def process(settings, filename):  # settings is not used currently.
    else:
        n = 10000

-    for i in range(n):
-        label = ord(l.read(1))
-        pixels = []
-        for j in range(28 * 28):
-            pixels.append(float(ord(f.read(1))) / 255.0)
-        yield {"pixel": pixels, 'label': label}
+    images = numpy.fromfile(
+        f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
+    images = images / 255.0 * 2.0 - 1.0
+    labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
+
+    for i in xrange(n):
+        yield {"pixel": images[i, :], 'label': labels[i]}

    f.close()
    l.close()
--- a/demo/quick_start/.gitignore
+++ b/demo/quick_start/.gitignore
@ -8,6 +8,8 @@ data/test.list
 data/test.txt
 data/train.list
 data/train.txt
+data/pred.list
+data/pred.txt
 dataprovider_copy_1.py
 train.log
 output
--- a/demo/quick_start/api_predict.py
+++ b/demo/quick_start/api_predict.py
@ -0,0 +1,147 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, sys
+import numpy as np
+from optparse import OptionParser
+from py_paddle import swig_paddle, DataProviderConverter
+from paddle.trainer.PyDataProvider2 import sparse_binary_vector
+from paddle.trainer.config_parser import parse_config
+"""
+Usage: run following command to show help message.
+  python api_predict.py -h
+"""
+
+
+class QuickStartPrediction():
+    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
+        """
+        train_conf: trainer configure.
+        dict_file: word dictionary file name.
+        model_dir: directory of model.
+        """
+        self.train_conf = train_conf
+        self.dict_file = dict_file
+        self.word_dict = {}
+        self.dict_dim = self.load_dict()
+        self.model_dir = model_dir
+        if model_dir is None:
+            self.model_dir = os.path.dirname(train_conf)
+
+        self.label = None
+        if label_file is not None:
+            self.load_label(label_file)
+
+        conf = parse_config(train_conf, "is_predict=1")
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
+        self.network.loadParameters(self.model_dir)
+        input_types = [sparse_binary_vector(self.dict_dim)]
+        self.converter = DataProviderConverter(input_types)
+
+    def load_dict(self):
+        """
+        Load dictionary from self.dict_file.
+        """
+        for line_count, line in enumerate(open(self.dict_file, 'r')):
+            self.word_dict[line.strip().split('\t')[0]] = line_count
+        return len(self.word_dict)
+
+    def load_label(self, label_file):
+        """
+        Load label.
+        """
+        self.label = {}
+        for v in open(label_file, 'r'):
+            self.label[int(v.split('\t')[1])] = v.split('\t')[0]
+
+    def get_index(self, data):
+        """
+        transform word into integer index according to the dictionary.
+        """
+        words = data.strip().split()
+        word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+        return word_slot
+
+    def batch_predict(self, data_batch):
+        input = self.converter(data_batch)
+        output = self.network.forwardTest(input)
+        prob = output[0]["id"].tolist()
+        print("predicting labels is:")
+        print prob
+
+
+def option_parser():
+    usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
+    parser = OptionParser(usage="usage: %s [options]" % usage)
+    parser.add_option(
+        "-n",
+        "--tconf",
+        action="store",
+        dest="train_conf",
+        help="network config")
+    parser.add_option(
+        "-d",
+        "--dict",
+        action="store",
+        dest="dict_file",
+        help="dictionary file")
+    parser.add_option(
+        "-b",
+        "--label",
+        action="store",
+        dest="label",
+        default=None,
+        help="dictionary file")
+    parser.add_option(
+        "-c",
+        "--batch_size",
+        type="int",
+        action="store",
+        dest="batch_size",
+        default=1,
+        help="the batch size for prediction")
+    parser.add_option(
+        "-w",
+        "--model",
+        action="store",
+        dest="model_path",
+        default=None,
+        help="model path")
+    return parser.parse_args()
+
+
+def main():
+    options, args = option_parser()
+    train_conf = options.train_conf
+    batch_size = options.batch_size
+    dict_file = options.dict_file
+    model_path = options.model_path
+    label = options.label
+    swig_paddle.initPaddle("--use_gpu=0")
+    predict = QuickStartPrediction(train_conf, dict_file, model_path, label)
+
+    batch = []
+    labels = []
+    for line in sys.stdin:
+        [label, text] = line.split("\t")
+        labels.append(int(label))
+        batch.append([predict.get_index(text)])
+    print("labels is:")
+    print labels
+    predict.batch_predict(batch)
+
+
+if __name__ == '__main__':
+    main()
--- a/demo/quick_start/api_predict.sh
+++ b/demo/quick_start/api_predict.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+#Note the default model is pass-00002, you shold make sure the model path
+#exists or change the mode path.
+#only test on trainer_config.lr.py
+model=output/pass-00001/
+config=trainer_config.lr.py
+label=data/labels.list
+dict=data/dict.txt
+batch_size=20
+head -n$batch_size data/test.txt | python api_predict.py \
+     --tconf=$config\
+     --model=$model \
+     --label=$label \
+     --dict=$dict \
+     --batch_size=$batch_size
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@ -31,16 +31,16 @@ def initializer(settings, dictionary, **kwargs):

    # setting.input_types specifies what the data types the data provider
    # generates.
-    settings.input_types = [
+    settings.input_types = {
        # The first input is a sparse_binary_vector,
        # which means each dimension of the vector is either 0 or 1. It is the
        # bag-of-words (BOW) representation of the texts.
-        sparse_binary_vector(len(dictionary)),
+        'word': sparse_binary_vector(len(dictionary)),
        # The second input is an integer. It represents the category id of the
        # sample. 2 means there are two labels in the dataset.
        # (1 for positive and 0 for negative)
-        integer_value(2)
-    ]
+        'label': integer_value(2)
+    }


 # Delaring a data provider. It has an initializer 'data_initialzer'.
@ -67,12 +67,12 @@ def process(settings, file_name):
            # Return the features for the current comment. The first is a list
            # of ids representing a 0-1 binary sparse vector of the text,
            # the second is the integer id of the label.
-            yield word_vector, int(label)
+            yield {'word': word_vector, 'label': int(label)}


 def predict_initializer(settings, dictionary, **kwargs):
    settings.word_dict = dictionary
-    settings.input_types = [sparse_binary_vector(len(dictionary))]
+    settings.input_types = {'word': sparse_binary_vector(len(dictionary))}


 # Declaring a data provider for prediction. The difference with process
@ -83,4 +83,4 @@ def process_predict(settings, file_name):
        for line in f:
            comment = line.strip().split()
            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment]
-            yield word_vector
+            yield {'word': word_vector}
--- a/Show More
+++ b/Show More