update

8 years ago · b6ca314c44
parent 62ff19e388 6cb78c6e2b
commit b6ca314c44
15 changed files with 590 additions and 130 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -4,22 +4,14 @@ cache:
    - $HOME/third_party
    - $HOME/.ccache
    - $HOME/.cache/pip
    - $HOME/Library/Caches/Homebrew
 sudo: required
 dist: trusty
 os:
  - linux
  - osx
 env:
  - JOB=DOCS
  - JOB=BUILD_AND_TEST
  - JOB=PRE_COMMIT
 matrix:
  exclude:
    - os: osx
      env: JOB=DOCS  # Only generate documentation in linux.
    - os: osx
      env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
 addons:
  apt:
@ -53,7 +45,6 @@ before_install:
        fi
      fi
    fi
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
  - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
  # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python 
  # protobuf version.
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -14,13 +14,14 @@
 INCLUDE(ExternalProject)
-SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
+FIND_PACKAGE(Protobuf)
 SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
 SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
-INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
+IF(NOT PROTOBUF_FOUND)
    SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
    SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
    SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
-IF(WIN32)
+    IF(WIN32)
        SET(PROTOBUF_LITE_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
        SET(PROTOBUF_LIBRARY
@ -28,7 +29,7 @@ IF(WIN32)
        SET(PROTOBUF_PROTOC_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
        SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
-ELSE(WIN32)
+    ELSE(WIN32)
        SET(PROTOBUF_LITE_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
        SET(PROTOBUF_LIBRARY
@ -36,9 +37,9 @@ ELSE(WIN32)
        SET(PROTOBUF_PROTOC_LIBRARY
            "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
        SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
-ENDIF(WIN32)
+    ENDIF(WIN32)
-ExternalProject_Add(
+    ExternalProject_Add(
        protobuf
        ${EXTERNAL_PROJECT_LOG_ARGS}
        PREFIX          ${PROTOBUF_SOURCES_DIR}
@ -54,6 +55,9 @@ ExternalProject_Add(
        -DCMAKE_BUILD_TYPE=Release
        -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
        -DCMAKE_INSTALL_LIBDIR=lib
-)
+    )
    LIST(APPEND external_project_dependencies protobuf)
 ENDIF(NOT PROTOBUF_FOUND)
-LIST(APPEND external_project_dependencies protobuf)
+INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
 INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
 INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
 MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}")
 MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}")
 MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}")
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@ -30,10 +30,11 @@ def main():
                result = trainer.test(reader=paddle.reader.batched(
                    paddle.dataset.mnist.test(), batch_size=256))
-                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
+                print "Pass %d, Batch %d, Cost %.2f, %s\n" \
-                    event.pass_id, event.batch_id, event.cost, event.metrics,
+                      "Testing cost %.2f metrics %s" % (
-                    result.metrics)
+                          event.pass_id, event.batch_id, event.cost,
-
+                          event.metrics,
                          result.cost, result.metrics)
        else:
            pass
--- a/demo/semantic_role_labeling/api_train_v2.py
+++ b/demo/semantic_role_labeling/api_train_v2.py
@ -0,0 +1,190 @@
 import sys
 import math
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.dataset.conll05 as conll05
 def db_lstm():
    word_dict, verb_dict, label_dict = conll05.get_dict()
    word_dict_len = len(word_dict)
    label_dict_len = len(label_dict)
    pred_len = len(verb_dict)
    mark_dict_len = 2
    word_dim = 32
    mark_dim = 5
    hidden_dim = 512
    depth = 8
    #8 features
    def d_type(size):
        return paddle.data_type.integer_value_sequence(size)
    word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
    predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
    ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
    ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
    ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
    ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
    ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
    mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
    target = paddle.layer.data(name='target', type=d_type(label_dict_len))
    default_std = 1 / math.sqrt(hidden_dim) / 3.0
    emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
    std_0 = paddle.attr.Param(initial_std=0.)
    std_default = paddle.attr.Param(initial_std=default_std)
    predicate_embedding = paddle.layer.embedding(
        size=word_dim,
        input=predicate,
        param_attr=paddle.attr.Param(
            name='vemb', initial_std=default_std))
    mark_embedding = paddle.layer.embedding(
        size=mark_dim, input=mark, param_attr=std_0)
    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
        paddle.layer.embedding(
            size=word_dim, input=x, param_attr=emb_para) for x in word_input
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)
    hidden_0 = paddle.layer.mixed(
        size=hidden_dim,
        bias_attr=std_default,
        input=[
            paddle.layer.full_matrix_projection(
                input=emb, param_attr=std_default) for emb in emb_layers
        ])
    mix_hidden_lr = 1e-3
    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
    hidden_para_attr = paddle.attr.Param(
        initial_std=default_std, learning_rate=mix_hidden_lr)
    lstm_0 = paddle.layer.lstmemory(
        input=hidden_0,
        act=paddle.activation.Relu(),
        gate_act=paddle.activation.Sigmoid(),
        state_act=paddle.activation.Sigmoid(),
        bias_attr=std_0,
        param_attr=lstm_para_attr)
    #stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]
    for i in range(1, depth):
        mix_hidden = paddle.layer.mixed(
            size=hidden_dim,
            bias_attr=std_default,
            input=[
                paddle.layer.full_matrix_projection(
                    input=input_tmp[0], param_attr=hidden_para_attr),
                paddle.layer.full_matrix_projection(
                    input=input_tmp[1], param_attr=lstm_para_attr)
            ])
        lstm = paddle.layer.lstmemory(
            input=mix_hidden,
            act=paddle.activation.Relu(),
            gate_act=paddle.activation.Sigmoid(),
            state_act=paddle.activation.Sigmoid(),
            reverse=((i % 2) == 1),
            bias_attr=std_0,
            param_attr=lstm_para_attr)
        input_tmp = [mix_hidden, lstm]
    feature_out = paddle.layer.mixed(
        size=label_dict_len,
        bias_attr=std_default,
        input=[
            paddle.layer.full_matrix_projection(
                input=input_tmp[0], param_attr=hidden_para_attr),
            paddle.layer.full_matrix_projection(
                input=input_tmp[1], param_attr=lstm_para_attr)
        ], )
    crf_cost = paddle.layer.crf(size=label_dict_len,
                                input=feature_out,
                                label=target,
                                param_attr=paddle.attr.Param(
                                    name='crfw',
                                    initial_std=default_std,
                                    learning_rate=mix_hidden_lr))
    crf_dec = paddle.layer.crf_decoding(
        name='crf_dec_l',
        size=label_dict_len,
        input=feature_out,
        label=target,
        param_attr=paddle.attr.Param(name='crfw'))
    return crf_cost, crf_dec
 def load_parameter(file_name, h, w):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32).reshape(h, w)
 def main():
    paddle.init(use_gpu=False, trainer_count=1)
    # define network topology
    crf_cost, crf_dec = db_lstm()
    # create parameters
    parameters = paddle.parameters.create([crf_cost, crf_dec])
    # create optimizer
    optimizer = paddle.optimizer.Momentum(
        momentum=0,
        learning_rate=2e-2,
        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
        model_average=paddle.optimizer.ModelAverage(
            average_window=0.5, max_average_window=10000), )
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
    trainer = paddle.trainer.SGD(cost=crf_cost,
                                 parameters=parameters,
                                 update_equation=optimizer)
    parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
    trn_reader = paddle.reader.batched(
        paddle.reader.shuffle(
            conll05.test(), buf_size=8192), batch_size=10)
    reader_dict = {
        'word_data': 0,
        'ctx_n2_data': 1,
        'ctx_n1_data': 2,
        'ctx_0_data': 3,
        'ctx_p1_data': 4,
        'ctx_p2_data': 5,
        'verb_data': 6,
        'mark_data': 7,
        'target': 8
    }
    trainer.train(
        reader=trn_reader,
        event_handler=event_handler,
        num_passes=10000,
        reader_dict=reader_dict)
 if __name__ == '__main__':
    main()
--- a/demo/sentiment/train_v2.py
+++ b/demo/sentiment/train_v2.py
@ -0,0 +1,247 @@
 import sys
 from os.path import join as join_path
 import paddle.trainer_config_helpers.attrs as attrs
 from paddle.trainer_config_helpers.poolings import MaxPooling
 import paddle.v2.layer as layer
 import paddle.v2.activation as activation
 import paddle.v2.data_type as data_type
 import paddle.v2.dataset.imdb as imdb
 import paddle.v2 as paddle
 def sequence_conv_pool(input,
                       input_size,
                       context_len,
                       hidden_size,
                       name=None,
                       context_start=None,
                       pool_type=None,
                       context_proj_layer_name=None,
                       context_proj_param_attr=False,
                       fc_layer_name=None,
                       fc_param_attr=None,
                       fc_bias_attr=None,
                       fc_act=None,
                       pool_bias_attr=None,
                       fc_attr=None,
                       context_attr=None,
                       pool_attr=None):
    """
    Text convolution pooling layers helper.
    Text input => Context Projection => FC Layer => Pooling => Output.
    :param name: name of output layer(pooling layer name)
    :type name: basestring
    :param input: name of input layer
    :type input: LayerOutput
    :param context_len: context projection length. See
                        context_projection's document.
    :type context_len: int
    :param hidden_size: FC Layer size.
    :type hidden_size: int
    :param context_start: context projection length. See
                          context_projection's context_start.
    :type context_start: int or None
    :param pool_type: pooling layer type. See pooling_layer's document.
    :type pool_type: BasePoolingType.
    :param context_proj_layer_name: context projection layer name.
                                    None if user don't care.
    :type context_proj_layer_name: basestring
    :param context_proj_param_attr: context projection parameter attribute.
                                    None if user don't care.
    :type context_proj_param_attr: ParameterAttribute or None.
    :param fc_layer_name: fc layer name. None if user don't care.
    :type fc_layer_name: basestring
    :param fc_param_attr: fc layer parameter attribute. None if user don't care.
    :type fc_param_attr: ParameterAttribute or None
    :param fc_bias_attr: fc bias parameter attribute. False if no bias,
                         None if user don't care.
    :type fc_bias_attr: ParameterAttribute or None
    :param fc_act: fc layer activation type. None means tanh
    :type fc_act: BaseActivation
    :param pool_bias_attr: pooling layer bias attr. None if don't care.
                           False if no bias.
    :type pool_bias_attr: ParameterAttribute or None.
    :param fc_attr: fc layer extra attribute.
    :type fc_attr: ExtraLayerAttribute
    :param context_attr: context projection layer extra attribute.
    :type context_attr: ExtraLayerAttribute
    :param pool_attr: pooling layer extra attribute.
    :type pool_attr: ExtraLayerAttribute
    :return: output layer name.
    :rtype: LayerOutput
    """
    # Set Default Value to param
    context_proj_layer_name = "%s_conv_proj" % name \
        if context_proj_layer_name is None else context_proj_layer_name
    with layer.mixed(
            name=context_proj_layer_name,
            size=input_size * context_len,
            act=activation.Linear(),
            layer_attr=context_attr) as m:
        m += layer.context_projection(
            input=input,
            context_len=context_len,
            context_start=context_start,
            padding_attr=context_proj_param_attr)
    fc_layer_name = "%s_conv_fc" % name \
        if fc_layer_name is None else fc_layer_name
    fl = layer.fc(name=fc_layer_name,
                  input=m,
                  size=hidden_size,
                  act=fc_act,
                  layer_attr=fc_attr,
                  param_attr=fc_param_attr,
                  bias_attr=fc_bias_attr)
    return layer.pooling(
        name=name,
        input=fl,
        pooling_type=pool_type,
        bias_attr=pool_bias_attr,
        layer_attr=pool_attr)
 def convolution_net(input_dim,
                    class_dim=2,
                    emb_dim=128,
                    hid_dim=128,
                    is_predict=False):
    data = layer.data("word", data_type.integer_value_sequence(input_dim))
    emb = layer.embedding(input=data, size=emb_dim)
    conv_3 = sequence_conv_pool(
        input=emb, input_size=emb_dim, context_len=3, hidden_size=hid_dim)
    conv_4 = sequence_conv_pool(
        input=emb, input_size=emb_dim, context_len=4, hidden_size=hid_dim)
    output = layer.fc(input=[conv_3, conv_4],
                      size=class_dim,
                      act=activation.Softmax())
    lbl = layer.data("label", data_type.integer_value(2))
    cost = layer.classification_cost(input=output, label=lbl)
    return cost
 def stacked_lstm_net(input_dim,
                     class_dim=2,
                     emb_dim=128,
                     hid_dim=512,
                     stacked_num=3,
                     is_predict=False):
    """
    A Wrapper for sentiment classification task.
    This network uses bi-directional recurrent network,
    consisting three LSTM layers. This configure is referred to
    the paper as following url, but use fewer layrs.
        http://www.aclweb.org/anthology/P15-1109
    input_dim: here is word dictionary dimension.
    class_dim: number of categories.
    emb_dim: dimension of word embedding.
    hid_dim: dimension of hidden layer.
    stacked_num: number of stacked lstm-hidden layer.
    is_predict: is predicting or not.
                Some layers is not needed in network when predicting.
    """
    assert stacked_num % 2 == 1
    layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5)
    fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3)
    lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.)
    para_attr = [fc_para_attr, lstm_para_attr]
    bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.)
    relu = activation.Relu()
    linear = activation.Linear()
    data = layer.data("word", data_type.integer_value_sequence(input_dim))
    emb = layer.embedding(input=data, size=emb_dim)
    fc1 = layer.fc(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
    lstm1 = layer.lstmemory(
        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
    inputs = [fc1, lstm1]
    for i in range(2, stacked_num + 1):
        fc = layer.fc(input=inputs,
                      size=hid_dim,
                      act=linear,
                      param_attr=para_attr,
                      bias_attr=bias_attr)
        lstm = layer.lstmemory(
            input=fc,
            reverse=(i % 2) == 0,
            act=relu,
            bias_attr=bias_attr,
            layer_attr=layer_attr)
        inputs = [fc, lstm]
    fc_last = layer.pooling(input=inputs[0], pooling_type=MaxPooling())
    lstm_last = layer.pooling(input=inputs[1], pooling_type=MaxPooling())
    output = layer.fc(input=[fc_last, lstm_last],
                      size=class_dim,
                      act=activation.Softmax(),
                      bias_attr=bias_attr,
                      param_attr=para_attr)
    lbl = layer.data("label", data_type.integer_value(2))
    cost = layer.classification_cost(input=output, label=lbl)
    return cost
 if __name__ == '__main__':
    # init
    paddle.init(use_gpu=True, trainer_count=4)
    # network config
    print 'load dictionary...'
    word_dict = imdb.word_dict()
    dict_dim = len(word_dict)
    class_dim = 2
    # Please choose the way to build the network
    # by uncommenting the corresponding line.
    cost = convolution_net(dict_dim, class_dim=class_dim)
    # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
    # create parameters
    parameters = paddle.parameters.create(cost)
    # create optimizer
    adam_optimizer = paddle.optimizer.Adam(
        learning_rate=2e-3,
        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
        model_average=paddle.optimizer.ModelAverage(average_window=0.5))
    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                reader=paddle.reader.batched(
                    lambda: imdb.test(word_dict), batch_size=128),
                reader_dict={'word': 0,
                             'label': 1})
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
    # create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=adam_optimizer)
    trainer.train(
        reader=paddle.reader.batched(
            paddle.reader.shuffle(
                lambda: imdb.train(word_dict), buf_size=1000),
            batch_size=100),
        event_handler=event_handler,
        reader_dict={'word': 0,
                     'label': 1},
        num_passes=10)
--- a/doc/design/reader/README.md
+++ b/doc/design/reader/README.md
@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t
 - A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items.
 - A *reader creator* is a function that returns a reader function.
- A *reader* decorator is a function, which accepts one or more readers, and returns a reader.
+- A *reader decorator* is a function, which accepts one or more readers, and returns a reader.
 - A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items.
-and provide frequently used reader creators and reader decorators.
+and provide function which converts reader to batch reader, frequently used reader creators and reader decorators.
 ## Data Reader Interface
@ -37,9 +38,54 @@ def reader_creator_random_imageand_label(widht, height, label):
 	return reader
 ```
 ## Batch Reader Interface
 *batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple.
 Here are valid outputs:
 ```python
 # a mini batch of three data items. Each data item consist three columns of data, each of which is 1.
 [(1, 1, 1),
 (2, 2, 2),
 (3, 3, 3)]
 # a mini batch of three data items, each data item is a list (single column).
 [([1,1,1],),
 ([2,2,2],),
 ([3,3,3],),
 ```
 Please note that each item inside the list must be a tuple, below is an invalid output:
 ```python
 # wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],).
 # Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1],
 # or three column of datas, each of which is 1.
 [[1,1,1],
 [2,2,2],
 [3,3,3]]
 ```
 It's easy to convert from reader to batch reader:
 ```python
 mnist_train = paddle.dataset.mnist.train()
 mnist_train_batch_reader = paddle.batch(mnist_train, 128)
 ```
 Also easy to create custom batch reader:
 ```python
 def custom_batch_reader():
 	while True:
 		batch = []
 		for i in xrange(128):
 			batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
 		yield batch
 mnist_random_image_batch_reader = custom_batch_reader
 ```
 ## Usage
-data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
+batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
 ```python
 # two data layer is created:
@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...)
 label_layer = paddle.layer.data("label", ...)
 # ...
-
+batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128)
-paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...)
+paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...)
 ```
 ## Data Reader Decorator
@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i
 Use `paddle.reader.buffered` to prefetch data:
 ```python
-buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100)
+buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100)
 ```
 `buffered_reader` will try to buffer (prefetch) `100` data entries.
@ -91,10 +137,10 @@ def reader_creator_bool(t):
 true_reader = reader_creator_bool(True)
 false_reader = reader_creator_bool(False)
-reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader)
+reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader)
-# Skipped 1 because paddle.dataset.mnist produces two items per data entry.
+# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry.
 # And we don't care second item at this time.
-paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
+paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
 ```
 ### Shuffle
@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader
 Example:
 ```python
-reader = paddle.reader.shuffle(paddle.dataset.mnist, 512)
+reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512)
 ```
 ## Q & A
-### Why return only a single entry, but not a mini batch?
+### Why reader return only a single entry, but not a mini batch?
 Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
 We provide function `paddle.batch` to turn (single entry) reader into batch reader.
-If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`.
+### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient?
-Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
+In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically.
 ### Why use a dictionary but not a list to provide mapping?
@ -137,7 +187,7 @@ def image_reader_creator(image_path, label_path, n):
 # images_reader_creator creates a reader
 reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
-paddle.train(reader, {"image":0, "label":1}, ...)
+paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...)
 ```
 ### How is `paddle.train` implemented
@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...)
 An example implementation of paddle.train could be:
 ```python
-def make_minibatch(reader, minibatch_size):
+def train(batch_reader, mapping, batch_size, total_pass):
 	def ret():
 		r = reader()
 		buf = [r.next() for x in xrange(minibatch_size)]
 		while len(buf) > 0:
 			yield buf
 			buf = [r.next() for x in xrange(minibatch_size)]
 	return ret
 def train(reader, mapping, batch_size, total_pass):
 	for pass_idx in range(total_pass):
-		for mini_batch in make_minibatch(reader): # this loop will never end in online learning.
+		for mini_batch in batch_reader(): # this loop will never end in online learning.
 			do_forward_backward(mini_batch, mapping)
 ```
--- a/doc/howto/usage/k8s/src/k8s_train/start_paddle.py
+++ b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py
@ -132,6 +132,7 @@ def startPaddle(idMap={}, train_args_dict=None):
    logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId)
    if not os.path.exists(JOB_PATH_OUTPUT):
        os.makedirs(JOB_PATH_OUTPUT)
    if not os.path.exists(logDir):
        os.mkdir(logDir)
    copyCommand = 'cp -rf ' + JOB_PATH + \
        "/" + str(trainerId) + "/data/*" + " ./data/"
--- a/paddle/scripts/travis/before_install.osx.sh
+++ b/paddle/scripts/travis/before_install.osx.sh
@ -1,4 +0,0 @@
 #!/bin/bash
 brew update
 brew tap homebrew/science
 brew install openblas swig md5sha1sum
--- a/paddle/scripts/travis/build_and_test.sh
+++ b/paddle/scripts/travis/build_and_test.sh
@ -2,18 +2,11 @@
 source ./common.sh
 NPROC=1
-if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
+export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
-  export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
+export PYTHONHOME=/opt/python/2.7.12
-  export PYTHONHOME=/opt/python/2.7.12
+export PATH=/opt/python/2.7.12/bin:${PATH}
-  export PATH=/opt/python/2.7.12/bin:${PATH}
+cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
-  cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
+NRPOC=`nproc`
-  NRPOC=`nproc`
+make -j $NPROC
-  make -j $NPROC
+make coveralls
-  make coveralls
+sudo make install
  sudo make install
 elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
  export PYTHONPATH=/usr/local/lib/python2.7/site-packages
  cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
  NPROC=`sysctl -n hw.ncpu`
  make -j $NPROC
 fi
--- a/python/paddle/v2/dataset/init.py
+++ b/python/paddle/v2/dataset/init.py
@ -17,6 +17,9 @@ import imikolov
 import imdb
 import cifar
 import movielens
 import conll05
 import uci_housing
-__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'uci_housing']
+__all__ = [
    'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'uci_housing'
 ]
--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle.v2.dataset.common
 import tarfile
 import gzip
 import itertools
 from common import download
 __all__ = ['test, get_dict', 'get_embedding']
 """
@ -160,7 +160,6 @@ def reader_creator(corpus_reader,
                ctx_p2 = 'eos'
            word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
            pred_idx = [predicate_dict.get(predicate)] * sen_len
            ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
            ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
@ -168,38 +167,30 @@ def reader_creator(corpus_reader,
            ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
            ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
            pred_idx = [predicate_dict.get(predicate)] * sen_len
            label_idx = [label_dict.get(w) for w in labels]
-            yield word_idx, pred_idx, ctx_n2_idx, ctx_n1_idx, \
+            yield word_idx, ctx_n2_idx, ctx_n1_idx, \
-              ctx_0_idx, ctx_p1_idx, ctx_p2_idx, mark, label_idx
+              ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
-    return reader()
+    return reader
 def get_dict():
-    word_dict = load_dict(
+    word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
-        common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
+    verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
-    verb_dict = load_dict(
+    label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
        common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
    label_dict = load_dict(
        common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
    return word_dict, verb_dict, label_dict
 def get_embedding():
-    return common.download(EMB_URL, 'conll05st', EMB_MD5)
+    return download(EMB_URL, 'conll05st', EMB_MD5)
 def test():
    word_dict, verb_dict, label_dict = get_dict()
    reader = corpus_reader(
-        common.download(DATA_URL, 'conll05st', DATA_MD5),
+        download(DATA_URL, 'conll05st', DATA_MD5),
        words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
        props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
    return reader_creator(reader, word_dict, verb_dict, label_dict)
 if __name__ == '__main__':
    print get_embedding()
    for f in test():
        print f
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@ -116,3 +116,8 @@ def test(word_idx):
    return reader_creator(
        re.compile("aclImdb/test/pos/.*\.txt$"),
        re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
 def word_dict():
    return build_dict(
        re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@ -34,8 +34,9 @@ class WithMetric(object):
 class TestResult(WithMetric):
-    def __init__(self, evaluator):
+    def __init__(self, evaluator, cost):
        super(TestResult, self).__init__(evaluator)
        self.cost = cost
 class BeginPass(object):
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@ -108,9 +108,6 @@ class SGD(ITrainer):
            pass_evaluator.start()
            updater.startPass()
            for batch_id, data_batch in enumerate(reader()):
                pass_type = updater.startBatch(len(data_batch))
                self.__gradient_machine__.forwardBackward(
                    feeder(data_batch), out_args, pass_type)
                batch_evaluator.start()
                event_handler(
                    v2_event.BeginIteration(
@ -123,10 +120,8 @@ class SGD(ITrainer):
                for each_param in self.__gradient_machine__.getNonStaticParameters(
                ):
                    updater.update(each_param)
-                # Get cost. We use numpy to calculate total cost for this batch.
+                cost_sum = out_args.sumCosts()
-                cost_vec = out_args.getSlotValue(0)
+                cost = cost_sum / len(data_batch)
                cost_vec = cost_vec.copyToNumpyMat()
                cost = cost_vec.sum() / len(data_batch)
                updater.finishBatch(cost)
                batch_evaluator.finish()
                event_handler(
@ -155,13 +150,18 @@ class SGD(ITrainer):
        evaluator = self.__gradient_machine__.makeEvaluator()
        out_args = api.Arguments.createArguments(0)
        evaluator.start()
        total_cost = 0
        num_samples = 0.0
        for data_batch in reader():
            num_samples += len(data_batch)
            self.__gradient_machine__.forward(
                feeder(data_batch), out_args, api.PASS_TEST)
            total_cost += out_args.sumCosts()
            self.__gradient_machine__.eval(evaluator)
        evaluator.finish()
-        return v2_event.TestResult(evaluator=evaluator)
+        return v2_event.TestResult(
            evaluator=evaluator, cost=total_cost / num_samples)
 def __check_train_args__(reader, event_handler, **kwargs):