Merge pull request #5345 from luotao1/ProtoDataProvider

remove usused ProtoDataProvider related codes
8 years ago · ba86885456
parent 3375e3e27b e01b09410d
commit ba86885456
26 changed files with 220 additions and 2770 deletions
--- a/paddle/gserver/CMakeLists.txt
+++ b/paddle/gserver/CMakeLists.txt
@ -73,7 +73,6 @@ if(MOBILE_INFERENCE)
    list(REMOVE_ITEM GSERVER_SOURCES
         dataproviders/DataProvider.cpp
         dataproviders/MultiDataProvider.cpp
         dataproviders/ProtoDataProvider.cpp
         dataproviders/PyDataProvider2.cpp
         dataproviders/PyDataProvider.cpp)
--- a/paddle/gserver/dataproviders/DataProvider.cpp
+++ b/paddle/gserver/dataproviders/DataProvider.cpp
@ -16,8 +16,8 @@ limitations under the License. */
 #include <unistd.h>
 #include <algorithm>
 #include "ProtoDataProvider.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/utils/StringUtil.h"
 #include "paddle/utils/Util.h"
@ -164,8 +164,6 @@ DataProvider* DataProvider::create(const DataConfig& config,
 REGISTER_DATA_PROVIDER(simple, SimpleDataProvider);
 REGISTER_DATA_PROVIDER(dummy, DummyDataProvider);
 REGISTER_DATA_PROVIDER(proto, ProtoDataProvider);
 REGISTER_DATA_PROVIDER(proto_sequence, ProtoSequenceDataProvider);
 int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) {
  int64_t batchSize = doubleBuffer_ ? getNextBatchFromBuffer(size, batch)
--- a/paddle/gserver/dataproviders/ProtoDataProvider.cpp
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.cpp
--- a/paddle/gserver/dataproviders/ProtoDataProvider.h
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.h
@ -1,179 +0,0 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <vector>
 #include "DataFormat.pb.h"
 #include "paddle/utils/Stat.h"
 #include "DataProvider.h"
 #include "ProtoReader.h"
 namespace paddle {
 /**
 * @brief Provider data from protobuf data file with each sample
 * specified by proto message
 *
 * DataSample defined in DataFormat.proto.
 *
 * The file format is
 *
 *    header
 *
 *    sample1
 *
 *    sample2
 *
 *    ...
 *
 *    sampleN
 *
 * @note: In the data file, each message is prefixed with its length.
 * The read/write of the protbuf are implemented in ProtoReader.h
 */
 class ProtoDataProvider : public DataProvider {
 public:
  ProtoDataProvider(const DataConfig& config,
                    bool useGpu,
                    bool loadDataAll = true);
  virtual void reset();
  /**
   * @note this size includes the sequences which are skipped because they
   * are longer than the batch size.
   */
  virtual int64_t getSize() {
    int64_t size = sampleNums_;
    if (usageRatio_ < 1.0f) {
      size = static_cast<int64_t>(size * usageRatio_);
    }
    return size;
  }
  virtual void shuffle();
  void loadData(const std::vector<std::string>& fileList);
  virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
 protected:
  /**
   * @brief load protobuf data from a list of file
   * @param[in]  fileName  file name of a file which contains
   * a list of file names
   */
  void loadData(const std::string& fileName);
  /**
   * @brief load protobuf data from file
   * @param[in]  fileName   data file name
   */
  void loadDataFile(const std::string& fileName);
  /** @brief check data header of each data sample
   *  @param[in] header     data header read from protobuf data
   */
  void checkDataHeader(const DataHeader& header);
  /**
   * @brief fill protobuf data into slot_,
   * slot_ is a vector of ProtoSlot in memory.
   * @param[in]  sample     data sample read from protobuf data
   */
  void fillSlots(const DataSample& sample);
  /**
   * @brief return true if each sample is one sequence, i.e., independent
   * of other samples.
   */
  inline bool iidData() const { return sequenceStartPositions_.empty(); }
  /**
   * @brief check that sample is consistent with header_
   */
  void checkSample(const DataSample& sample);
  template <class Op>
  int64_t sequenceLoop(Op op, int64_t size);
  template <class Op>
  int64_t sampleLoop(Op op, int64_t size);
  template <class Op>
  int64_t subSampleLoop(Op op, int64_t size, int slot);
  void showDataStats();
 protected:
  struct ProtoVarSlot {
    std::vector<real> data;
    std::vector<int> dims;
  };
  struct ProtoSlot {
    SlotDef::SlotType type;
    int dim;
    std::vector<int> indexData;
    std::vector<real> denseData;
    std::vector<sparse_non_value_t> sparseNonValueData;
    std::vector<sparse_float_value_t> sparseFloatValueData;
    std::vector<int64_t> indices;
    std::vector<int64_t> subIndices;
    std::vector<ProtoVarSlot> varDenseData;
    std::vector<std::vector<int>> varIndices;
    std::vector<std::string> strData;
  };
  DataHeader header_;
  int numVecSlots_;
  std::vector<ProtoSlot> slots_;
  size_t sampleNums_;
  /**
   * The starting position of each sequence in samples.
   * The last element should be num of samples.
   * If empty, each sample is one sequence.
   */
  std::vector<size_t> sequenceStartPositions_;
  int64_t currentSequenceIndex_;
  // The size should be the number of sequences.
  std::vector<size_t> shuffledSequenceIds_;
  ThreadLocalD<DataBatch> cpuBatch_;
  ThreadLocalD<DataBatch> gpuBatch_;
  RWLock lock_;
  std::vector<StatPtr> nnzStats_;  // stats for number of none-zeros entries
 };
 /**
 * @brief Special use for Proto data: instances should contain sparse-non-value
 * slots
 * and label.
 *
 * @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
 */
 class ProtoSequenceDataProvider : public ProtoDataProvider {
 public:
  ProtoSequenceDataProvider(const DataConfig& config,
                            bool useGpu,
                            bool loadDataAll = true);
  ~ProtoSequenceDataProvider() {}
  virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
 };
 }  // namespace paddle
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@ -62,17 +62,6 @@ if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
 endif()
 if(NOT MOBILE_INFERENCE)
 ################### test_ProtoDataProvider ############
    add_unittest_without_exec(test_ProtoDataProvider
        test_ProtoDataProvider.cpp)
    # test_ProtoDataProvider will mkdir as same name,
    # so if WORKING_DIRECTORY is default directory, then
    # mkdir will get error.
    add_test(NAME test_ProtoDataProvider
        COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider
        WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
 ################## test_Evaluator #######################
    add_unittest(test_Evaluator
        test_Evaluator.cpp)
@ -110,3 +99,24 @@ add_test(NAME test_PyDataProvider2
   COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2
        WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle
 )
 ################# test_CompareSparse ##################
 add_unittest_without_exec(test_CompareSparse
    test_CompareSparse.cpp)
 if(NOT ON_TRAVIS)
  add_test(NAME test_CompareSparse
    COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
          ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
              ./.set_port.sh -p port -n 6
                  ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse
    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 endif()
 ################ test_CompareTwoNets ######################
 add_unittest_without_exec(test_CompareTwoNets
    test_CompareTwoNets.cpp)
 add_test(NAME test_CompareTwoNets
  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
        ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
        ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
--- a/paddle/gserver/tests/proto_files.txt
+++ b/paddle/gserver/tests/proto_files.txt
@ -1,2 +0,0 @@
 ./test_ProtoDataProvider/data1.bin
 ./test_ProtoDataProvider/data2.bin
--- a/paddle/gserver/tests/proto_files_compressed.txt
+++ b/paddle/gserver/tests/proto_files_compressed.txt
@ -1,2 +0,0 @@
 ./test_ProtoDataProvider/data1.bin.gz
 ./test_ProtoDataProvider/data2.bin.gz
--- a/paddle/gserver/tests/sequence_lstm.conf
+++ b/paddle/gserver/tests/sequence_lstm.conf
@ -0,0 +1,64 @@
 #!/usr/bin/env python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 ######################## data source ################################
 dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
 dict_file = dict()
 for line_count, line in enumerate(open(dict_path, "r")):
    dict_file[line.strip()] = line_count
 define_py_data_sources2(
    train_list='gserver/tests/Sequence/train.list',
    test_list=None,
    module='sequenceGen',
    obj='process',
    args={"dict_file": dict_file})
 settings(batch_size=5)
 ######################## network configure ################################
 dict_dim = len(open(dict_path, 'r').readlines())
 word_dim = 128
 hidden_dim = 256
 label_dim = 3
 sparse_update = get_config_arg("sparse_update", bool, False)
 data = data_layer(name="word", size=dict_dim)
 emb = embedding_layer(
    input=data,
    size=word_dim,
    param_attr=ParamAttr(sparse_update=sparse_update))
 with mixed_layer(size=hidden_dim * 4) as lstm_input:
    lstm_input += full_matrix_projection(input=emb)
 lstm = lstmemory(
    input=lstm_input,
    act=TanhActivation(),
    gate_act=SigmoidActivation(),
    state_act=TanhActivation())
 lstm_last = last_seq(input=lstm)
 with mixed_layer(
        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
    output += full_matrix_projection(input=lstm_last)
 outputs(
    classification_cost(
        input=output, label=data_layer(
            name="label", size=1)))
--- a/paddle/gserver/tests/sequence_recurrent.py
+++ b/paddle/gserver/tests/sequence_recurrent.py
@ -0,0 +1,56 @@
 #!/usr/bin/env python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 ######################## data source ################################
 dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
 dict_file = dict()
 for line_count, line in enumerate(open(dict_path, "r")):
    dict_file[line.strip()] = line_count
 define_py_data_sources2(
    train_list='gserver/tests/Sequence/train.list',
    test_list=None,
    module='sequenceGen',
    obj='process',
    args={"dict_file": dict_file})
 settings(batch_size=5)
 ######################## network configure ################################
 dict_dim = len(open(dict_path, 'r').readlines())
 word_dim = 128
 hidden_dim = 128
 label_dim = 3
 # This config is designed to be equivalent with sequence_recurrent_group.py
 data = data_layer(name="word", size=dict_dim)
 emb = embedding_layer(
    input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
 recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation())
 recurrent_last = last_seq(input=recurrent)
 with mixed_layer(
        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
    output += full_matrix_projection(input=recurrent_last)
 outputs(
    classification_cost(
        input=output, label=data_layer(
            name="label", size=1)))
--- a/paddle/gserver/tests/sequence_recurrent_group.py
+++ b/paddle/gserver/tests/sequence_recurrent_group.py
@ -0,0 +1,70 @@
 #!/usr/bin/env python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 ######################## data source ################################
 dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
 dict_file = dict()
 for line_count, line in enumerate(open(dict_path, "r")):
    dict_file[line.strip()] = line_count
 define_py_data_sources2(
    train_list='gserver/tests/Sequence/train.list',
    test_list=None,
    module='sequenceGen',
    obj='process',
    args={"dict_file": dict_file})
 settings(batch_size=5)
 ######################## network configure ################################
 dict_dim = len(open(dict_path, 'r').readlines())
 word_dim = 128
 hidden_dim = 128
 label_dim = 3
 # This config is designed to be equivalent with sequence_recurrent.py
 data = data_layer(name="word", size=dict_dim)
 emb = embedding_layer(
    input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
 def step(y):
    mem = memory(name="rnn_state", size=hidden_dim)
    with mixed_layer(
            name="rnn_state",
            size=hidden_dim,
            bias_attr=False,
            act=SoftmaxActivation()) as out:
        out += identity_projection(input=y)
        out += full_matrix_projection(
            input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__"))
    return out
 recurrent = recurrent_group(name="rnn", step=step, input=emb)
 recurrent_last = last_seq(input=recurrent)
 with mixed_layer(
        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
    output += full_matrix_projection(input=recurrent_last)
 outputs(
    classification_cost(
        input=output, label=data_layer(
            name="label", size=1)))
--- a/paddle/gserver/tests/test_CompareSparse.cpp
+++ b/paddle/gserver/tests/test_CompareSparse.cpp
@ -22,8 +22,7 @@ limitations under the License. */
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
-static const string& configFile1 =
+static const string& configFile1 = "gserver/tests/sequence_lstm.conf";
    "trainer/tests/sample_trainer_config_compare_sparse.conf";
 DECLARE_bool(use_gpu);
 DECLARE_string(config);
--- a/paddle/gserver/tests/test_CompareTwoNets.cpp
+++ b/paddle/gserver/tests/test_CompareTwoNets.cpp
@ -30,8 +30,6 @@ DECLARE_bool(use_gpu);
 DECLARE_string(config);
 DECLARE_string(nics);
 DEFINE_string(config_file_a, "", "config of one network to compare");
 DEFINE_string(config_file_b, "", "config of another network to compare");
 DEFINE_bool(need_high_accuracy,
            false,
            "whether need to run in double accuracy");
@ -42,6 +40,10 @@ DEFINE_double(
 DECLARE_bool(thread_local_rand_use_global_seed);
 DECLARE_int32(seed);
 static const string& config_file_a = "gserver/tests/sequence_recurrent.py";
 static const string& config_file_b =
    "gserver/tests/sequence_recurrent_group.py";
 struct ComData {
  vector<Argument> outArgs;
  vector<ParameterPtr> parameters;
@ -66,6 +68,7 @@ void calcGradient(ComData& data, const string configFile) {
  DataBatch dataBatch;
  int32_t batchSize = trainer.getConfig().opt_config().batch_size();
  trainer.getDataProvider()->reset();
  trainer.getDataProvider()->setSkipShuffle();
  trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch);
@ -167,11 +170,11 @@ void compareGradient(ComData& comDataA, ComData& comDataB) {
 TEST(Trainer, create) {
  ComData dataA;
-  calcGradient(dataA, FLAGS_config_file_a);
+  calcGradient(dataA, config_file_a);
  LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n";
  ComData dataB;
-  calcGradient(dataB, FLAGS_config_file_b);
+  calcGradient(dataB, config_file_b);
  LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n";
  compareGradient(dataA, dataB);
--- a/paddle/gserver/tests/test_ProtoDataProvider.cpp
+++ b/paddle/gserver/tests/test_ProtoDataProvider.cpp
--- a/paddle/trainer/tests/CMakeLists.txt
+++ b/paddle/trainer/tests/CMakeLists.txt
@ -28,35 +28,7 @@ if(WITH_PYTHON)
          ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass
      WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 endif()
 ################ test_CompareTwoNets ######################
 add_unittest_without_exec(test_CompareTwoNets
    test_CompareTwoNets.cpp)
 add_test(NAME test_CompareTwoNets
  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
        ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
            --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf
    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 ############### test_CompareTwoOpts ###################
 add_unittest_without_exec(test_CompareTwoOpts
    test_CompareTwoOpts.cpp)
 add_test(NAME test_CompareTwoOpts
  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
        ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts
            --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf
            --num_passes=1 --need_high_accuracy=0
    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 ################# test_CompareSparse ##################
 add_unittest_without_exec(test_CompareSparse
    test_CompareSparse.cpp)
 if(NOT ON_TRAVIS)
  add_test(NAME test_CompareSparse
    COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
              ./.set_port.sh -p port -n 6
                  ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse
    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 endif()
 ################# test_recurrent_machine_generation ###############
 add_unittest_without_exec(test_recurrent_machine_generation
    test_recurrent_machine_generation.cpp)
--- a/paddle/trainer/tests/mnist.list
+++ b/paddle/trainer/tests/mnist.list
@ -1 +0,0 @@
 trainer/tests/mnist_bin_part
--- a/paddle/trainer/tests/mnist_bin_part
+++ b/paddle/trainer/tests/mnist_bin_part
--- a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
+++ b/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
--- a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist
+++ b/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.protolist
@ -1 +0,0 @@
 ./trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
--- a/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf
+++ b/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf
@ -1,154 +0,0 @@
 #edit-mode: -*- python -*-
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
 # Note: when making change to this file, please make sure
 # sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
 # for comparing these two nets can pass (test_CompareTwoNets)
 default_initial_std(0.1)
 default_device(0)
 word_dim = 999
 l1 = 0
 l2 = 0
 model_type("nn")
 sparse_update = get_config_arg("sparse_update", bool, False)
 TrainData(ProtoData(        
            type = "proto_sequence",
            files = ('trainer/tests/train_sparse.list'), 
            ))
 Settings(
    algorithm='sgd',
    batch_size=100,
    learning_rate=0.0001,
    learning_rate_decay_a=4e-08,
    learning_rate_decay_b=0.0,
    learning_rate_schedule='poly',
 )
 wordvec_dim = 32
 layer2_dim = 16
 layer3_dim = 16
 hidden_dim = 32
 slot_names = ["qb", "qw", "tb", "tw"]
 def ltr_network(network_name,
                word_dim=word_dim,
                wordvec_dim=wordvec_dim,
                layer2_dim=layer2_dim,
                layer3_dim=layer3_dim,
                hidden_dim=hidden_dim,
                slot_names=slot_names,
                l1=l1,
                l2=l2):
    slotnum = len(slot_names)
    for i in xrange(slotnum):
        Inputs(slot_names[i] + network_name)
    for i in xrange(slotnum):
        Layer(
            name = slot_names[i] + network_name,
            type = "data",
            size = word_dim,
            device = -1,
        )
        Layer(
            name = slot_names[i] + "_embedding_" + network_name,
            type = "mixed",
            size = wordvec_dim,
            bias = False,
            device = -1,
            inputs = TableProjection(slot_names[i] + network_name,
                                     parameter_name = "embedding.w0",
                                     decay_rate_l1=l1,
                                     sparse_remote_update = True,
                                     sparse_update = sparse_update,
                                     ),
        )
        Layer(
            name = slot_names[i] + "_rnn1_" + network_name,
            type = "recurrent",
            active_type = "tanh",
            bias = Bias(initial_std = 0,
                        parameter_name = "rnn1.bias"),
            inputs = Input(slot_names[i] + "_embedding_" + network_name,
                           parameter_name = "rnn1.w0")
        )
        Layer(
            name = slot_names[i] + "_rnnlast_" + network_name,
            type = "seqlastins",
            inputs = [
                slot_names[i] + "_rnn1_" + network_name,
            ],
        )
    Layer(
        name = "layer2_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer2_dim,
        bias = Bias(parameter_name = "layer2.bias"),
        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
                        parameter_name = "_layer2_" + slot_name + ".w", 
                        decay_rate = l2, 
                        initial_smart = True) for slot_name in slot_names]
    )
    Layer(
        name = "layer3_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer3_dim,
        bias = Bias(parameter_name = "layer3.bias"),
        inputs = [
            Input("layer2_" + network_name, 
                  parameter_name = "_layer3.w", 
                  decay_rate = l2, 
                  initial_smart = True),
        ]
    )
    Layer(
        name = "output_" + network_name,
        type = "fc",
        size = 1,
        bias = False,
        inputs = [
                  Input("layer3_" + network_name,
                       parameter_name = "_layerO.w"),
                 ],
        )
 ltr_network("left")
 ltr_network("right")
 Inputs("label")
 Layer(
    name = "label",
    type = "data",
    size = 1,
    )
 Outputs("cost", "qb_rnnlast_left")
 Layer(
    name = "cost",
    type = "rank-cost",
    inputs = ["output_left", "output_right", "label"],
    )
--- a/paddle/trainer/tests/sample_trainer_config_opt_a.conf
+++ b/paddle/trainer/tests/sample_trainer_config_opt_a.conf
@ -1,40 +0,0 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 ################################### Data Configuration ###################################
 TrainData(ProtoData(files = "trainer/tests/mnist.list"))
 ################################### Algorithm Configuration ###################################
 settings(batch_size = 1000,
         learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
 ################################### Network Configuration ###################################
 data = data_layer(name ="input", size=784)
 fc1 = fc_layer(input=data, size=800,
               bias_attr=True,
               act=SigmoidActivation())
 fc2 = fc_layer(input=fc1, size=800,
               bias_attr=True,
               act=SigmoidActivation())
 output = fc_layer(input=[fc1, fc2], size=10,
                  bias_attr=True,
                  act=SoftmaxActivation())
 lbl = data_layer(name ="label", size=1)
 cost = classification_cost(input=output, label=lbl)
 outputs(cost)
--- a/paddle/trainer/tests/sample_trainer_config_opt_b.conf
+++ b/paddle/trainer/tests/sample_trainer_config_opt_b.conf
@ -1,40 +0,0 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 ################################### Data Configuration ###################################
 TrainData(ProtoData(files = "trainer/tests/mnist.list"))
 ################################### Algorithm Configuration ###################################
 settings(batch_size = 1000,
         learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
 ################################### Network Configuration ###################################
 data = data_layer(name ="input", size=784)
 fc1 = fc_layer(input=data, size=800,
               bias_attr=True,
               act=SigmoidActivation())
 fc2 = fc_layer(input=fc1, size=800,
               bias_attr=True,
               act=SigmoidActivation())
 output = fc_layer(input=[fc1, fc2], size=10,
                  bias_attr=True,
                  act=SoftmaxActivation())
 lbl = data_layer(name ="label", size=1)
 cost = classification_cost(input=output, label=lbl)
 outputs(cost)
--- a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
+++ b/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
@ -1,154 +0,0 @@
 #edit-mode: -*- python -*-
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
 # Note: when making change to this file, please make sure
 # sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
 # for comparing these two nets can pass (test_CompareTwoNets)
 default_initial_std(0.1)
 default_device(0)
 word_dim = 1451594
 l1 = 0
 l2 = 0
 model_type("nn")
 sparse_update = get_config_arg("sparse_update", bool, False)
 TrainData(ProtoData(        
            type = "proto_sequence",
            files = ('trainer/tests/train.list'), 
            ))
 Settings(
    algorithm='sgd',
    batch_size=100,
    learning_rate=0.0001,
    learning_rate_decay_a=4e-08,
    learning_rate_decay_b=0.0,
    learning_rate_schedule='poly',
 )
 wordvec_dim = 128
 layer2_dim = 96
 layer3_dim = 96
 hidden_dim = 128
 slot_names = ["qb", "qw", "tb", "tw"]
 def ltr_network(network_name,
                word_dim=word_dim,
                wordvec_dim=wordvec_dim,
                layer2_dim=layer2_dim,
                layer3_dim=layer3_dim,
                hidden_dim=hidden_dim,
                slot_names=slot_names,
                l1=l1,
                l2=l2):
    slotnum = len(slot_names)
    for i in xrange(slotnum):
        Inputs(slot_names[i] + network_name)
    for i in xrange(slotnum):
        Layer(
            name = slot_names[i] + network_name,
            type = "data",
            size = word_dim,
            device = -1,
        )
        Layer(
            name = slot_names[i] + "_embedding_" + network_name,
            type = "mixed",
            size = wordvec_dim,
            bias = False,
            device = -1,
            inputs = TableProjection(slot_names[i] + network_name,
                                     parameter_name = "embedding.w0",
                                     decay_rate_l1=l1,
                                     sparse_remote_update = True,
                                     sparse_update = sparse_update,
                                     ),
        )
        Layer(
            name = slot_names[i] + "_rnn1_" + network_name,
            type = "recurrent",
            active_type = "tanh",
            bias = Bias(initial_std = 0,
                        parameter_name = "rnn1.bias"),
            inputs = Input(slot_names[i] + "_embedding_" + network_name,
                           parameter_name = "rnn1.w0")
        )
        Layer(
            name = slot_names[i] + "_rnnlast_" + network_name,
            type = "seqlastins",
            inputs = [
                slot_names[i] + "_rnn1_" + network_name,
            ],
        )
    Layer(
        name = "layer2_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer2_dim,
        bias = Bias(parameter_name = "layer2.bias"),
        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
                        parameter_name = "_layer2_" + slot_name + ".w", 
                        decay_rate = l2, 
                        initial_smart = True) for slot_name in slot_names]
    )
    Layer(
        name = "layer3_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer3_dim,
        bias = Bias(parameter_name = "layer3.bias"),
        inputs = [
            Input("layer2_" + network_name, 
                  parameter_name = "_layer3.w", 
                  decay_rate = l2, 
                  initial_smart = True),
        ]
    )
    Layer(
        name = "output_" + network_name,
        type = "fc",
        size = 1,
        bias = False,
        inputs = [
                  Input("layer3_" + network_name,
                       parameter_name = "_layerO.w"),
                 ],
        )
 ltr_network("left")
 ltr_network("right")
 Inputs("label")
 Layer(
    name = "label",
    type = "data",
    size = 1,
    )
 Outputs("cost", "qb_rnnlast_left")
 Layer(
    name = "cost",
    type = "rank-cost",
    inputs = ["output_left", "output_right", "label"],
    )
--- a/paddle/trainer/tests/sample_trainer_config_rnn.conf
+++ b/paddle/trainer/tests/sample_trainer_config_rnn.conf
@ -1,180 +0,0 @@
 #edit-mode: -*- python -*-
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
 # Note: when making change to this file, please make sure
 # sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest
 # for comparing these two nets can pass (test_CompareTwoNets)
 default_initial_std(0.1)
 default_device(0)
 word_dim = 1451594
 l1 = 0
 l2 = 0
 model_type("recurrent_nn")
 sparse_update = get_config_arg("sparse_update", bool, False)
 TrainData(ProtoData(
            type = "proto_sequence",
            files = ('trainer/tests/train.list'), 
            ))
 Settings(
    algorithm='sgd',
    batch_size=100,
    learning_rate=0.0001,
    learning_rate_decay_a=4e-08,
    learning_rate_decay_b=0.0,
    learning_rate_schedule='poly',
 )
 wordvec_dim = 128
 layer2_dim = 96
 layer3_dim = 96
 hidden_dim = 128
 slot_names = ["qb", "qw", "tb", "tw"]
 def SimpleRecurrentLayer(name, 
                         size, 
                         active_type, 
                         bias, 
                         input_layer_name, 
                         parameter_name,
                         seq_reversed = False):
    RecurrentLayerGroupBegin(name + "_layer_group", 
                             in_links=[input_layer_name], 
                             out_links=[name],
                             seq_reversed=seq_reversed)
    memory_name = Memory(name=name, size=size)
    Layer(
        name = name,
        type = "mixed",
        size = size,
        active_type = active_type,
        bias = bias,
        inputs = [IdentityProjection(input_layer_name),
                  FullMatrixProjection(memory_name,
                                       parameter_name = parameter_name,
                                       ),
                  ]
        )
    RecurrentLayerGroupEnd(name + "_layer_group")
 def ltr_network(network_name,
                word_dim=word_dim,
                wordvec_dim=wordvec_dim,
                layer2_dim=layer2_dim,
                layer3_dim=layer3_dim,
                hidden_dim=hidden_dim,
                slot_names=slot_names,
                l1=l1,
                l2=l2):
    slotnum = len(slot_names)
    for i in xrange(slotnum):
        Inputs(slot_names[i] + network_name)
    for i in xrange(slotnum):
        Layer(
            name = slot_names[i] + network_name,
            type = "data",
            size = word_dim,
            device = -1,
        )
        Layer(
            name = slot_names[i] + "_embedding_" + network_name,
            type = "mixed",
            size = wordvec_dim,
            bias = False,
            device = -1,
            inputs = TableProjection(slot_names[i] + network_name,
                                     parameter_name = "embedding.w0",
                                     decay_rate_l1=l1,
                                     sparse_remote_update = True,
                                     sparse_update = sparse_update,
                                     ),
        )
        SimpleRecurrentLayer(
            name = slot_names[i] + "_rnn1_" + network_name,
            size = hidden_dim,
            active_type = "tanh",
            bias = Bias(initial_std = 0,
                        parameter_name = "rnn1.bias"),
            input_layer_name = slot_names[i] + "_embedding_" + network_name,
            parameter_name = "rnn1.w0",
            )
        Layer(
            name = slot_names[i] + "_rnnlast_" + network_name,
            type = "seqlastins",
            inputs = [
                slot_names[i] + "_rnn1_" + network_name,
            ],
        )
    Layer(
        name = "layer2_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer2_dim,
        bias = Bias(parameter_name = "layer2.bias"),
        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
                        parameter_name = "_layer2_" + slot_name + ".w", 
                        decay_rate = l2, 
                        initial_smart = True) for slot_name in slot_names]
    )
    Layer(
        name = "layer3_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer3_dim,
        bias = Bias(parameter_name = "layer3.bias"),
        inputs = [
            Input("layer2_" + network_name, 
                  parameter_name = "_layer3.w", 
                  decay_rate = l2, 
                  initial_smart = True),
        ]
    )
    Layer(
        name = "output_" + network_name,
        type = "fc",
        size = 1,
        bias = False,
        inputs = [
                  Input("layer3_" + network_name,
                       parameter_name = "_layerO.w"),
                 ],
        )
 ltr_network("left")
 ltr_network("right")
 Inputs("label")
 Layer(
    name = "label",
    type = "data",
    size = 1,
    )
 Outputs("cost", "qb_rnnlast_left")
 Layer(
    name = "cost",
    type = "rank-cost",
    inputs = ["output_left", "output_right", "label"],
    )
--- a/paddle/trainer/tests/testPyDataWrapper.py
+++ b/paddle/trainer/tests/testPyDataWrapper.py
@ -20,28 +20,6 @@ import random
 import json
 import string
@provider(slots=[
    SparseNonValueSlot(10), DenseSlot(2), SparseValueSlot(10), StringSlot(1),
    IndexSlot(3)
 ])
 def processNonSequenceData(obj, filename):
    with open(filename, "rb") as f:
        for line in f:
            slots_str = line.split(';')
            index = int(slots_str[0])
            non_values = map(int, slots_str[1].split()[1:])
            dense = map(float, slots_str[2].split()[1:])
            strs = slots_str[4].strip().split(' ', 1)[1]
            def __values_mapper__(s):
                s = s.split(":")
                return int(s[0]), float(s[1])
            values = map(__values_mapper__, slots_str[3].split()[1:])
            yield [non_values, dense, values, strs, index]
 SPARSE_ID_LIMIT = 1000
 SPARSE_ID_COUNT = 100
 SEQUENCE_LIMIT = 50
@ -146,8 +124,6 @@ def processSubSeqAndGenerateData(obj, name):
 if __name__ == "__main__":
    pvd = processNonSequenceData("test.txt")
    print pvd.getNextBatch(100)
    pvd = processSeqAndGenerateData("_")
    print pvd.getNextBatch(100)
    pvd = processSubSeqAndGenerateData("_")
--- a/paddle/trainer/tests/test_CompareTwoOpts.cpp
+++ b/paddle/trainer/tests/test_CompareTwoOpts.cpp
@ -1,184 +0,0 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <gtest/gtest.h>
 #include <paddle/utils/PythonUtil.h>
 #include <algorithm>
 #include <cstdlib>
 #include "paddle/trainer/Trainer.h"
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(local);
 DECLARE_bool(use_gpu);
 DECLARE_string(config);
 DECLARE_string(nics);
 DEFINE_string(config_file_a, "", "config of one network to compare");
 DEFINE_string(config_file_b, "", "config of another network to compare");
 DEFINE_bool(need_high_accuracy,
            true,
            "whether need to run in double accuracy (recommended)");
 DEFINE_double(
    max_diff_ratio,
    0.0f,
    "max diff ratio allowed for outputs and parameters (value/gradient)");
 struct ComData {
  vector<Argument> outArgs;
  vector<ParameterPtr> parameters;
 };
 void calcGradient(ComData& data, const string configFile) {
  FLAGS_config = configFile;
  FLAGS_local = true;
  FLAGS_use_gpu = false;
  FLAGS_nics = "";
  *ThreadLocalRand::getSeed() = 0;
  srand(0);
  Trainer trainer;
  trainer.init(TrainerConfigHelper::createFromFlagConfig(), false);
  data.parameters = trainer.getGradientMachine()->getParameters();
  trainer.getDataProvider()->setSkipShuffle();
  trainer.train();
 }
 void checkBuffer(real* A,
                 const char* desA,
                 real* B,
                 const char* desB,
                 size_t len,
                 size_t width = 1) {
  int nNum = 0;
  for (size_t i = 0; i < len; ++i) {
    real diff = fabs(A[i] - B[i]);
    if (diff > 0.0f &&
        diff / std::max(fabs(A[i]), fabs(B[i])) > FLAGS_max_diff_ratio) {
      nNum++;
      LOG(INFO) << "Row: " << i / width << ", " << desA << " : " << A[i]
                << "    " << desB << " : " << B[i];
    }
  }
  EXPECT_EQ(0, nNum);
  LOG(INFO) << "\n\n";
 }
 void compareGradient(ComData& comDataA, ComData& comDataB) {
  vector<Argument> outArgsA = comDataA.outArgs;
  vector<Argument> outArgsB = comDataB.outArgs;
  for (size_t i = 0; i < outArgsA.size(); ++i) {
    CpuMatrix matA(outArgsA[i].value->getHeight(),
                   outArgsA[i].value->getWidth());
    CpuMatrix matB(outArgsB[i].value->getHeight(),
                   outArgsB[i].value->getWidth());
    matA.copyFrom(*outArgsA[i].value);
    matB.copyFrom(*outArgsB[i].value);
    LOG(INFO) << "\n--------------------------------"
              << " Check Network Output_" << i << ":"
              << " -------------------------------------\n";
    checkBuffer(matA.getData(),
                "network A output",
                matB.getData(),
                "network B output",
                matA.getElementCnt(),
                matA.getWidth());
  }
  vector<ParameterPtr>& parametersA = comDataA.parameters;
  vector<ParameterPtr>& parametersB = comDataB.parameters;
  LOG(INFO) << "\n\n--------------------------------"
            << " Check Gradient Machine Parameters:"
            << " -------------------------------------\n";
  for (size_t i = 0; i < parametersA.size(); ++i) {
    ParameterPtr parameterA, parameterB;
    parameterA = parametersA[i];
    parameterB = parametersB[i];
    CpuVector paraA(parameterA->getSize());
    CpuVector paraB(parameterB->getSize());
    paraA.copyFrom(*parameterA->getBuf(PARAMETER_VALUE));
    paraB.copyFrom(*parameterB->getBuf(PARAMETER_VALUE));
    LOG(INFO) << "\n\n----------- PARAMETER_VALUE:  " << parameterA->getName()
              << " ; size : " << paraA.getSize() << " ------------";
    checkBuffer(paraA.getData(),
                "Network A",
                paraB.getData(),
                "Network B",
                paraA.getSize());
    CpuVector gradA(*parameterA->getBuf(PARAMETER_GRADIENT));
    CpuVector gradB(*parameterB->getBuf(PARAMETER_GRADIENT));
    LOG(INFO) << "\n\n----------- PARAMETER_GRADIENT: " << parameterA->getName()
              << " ; size : " << gradA.getSize() << " -----------";
    checkBuffer(gradA.getData(),
                "Network A",
                gradB.getData(),
                "Network B",
                gradA.getSize());
  }
 }
 TEST(Trainer, create) {
  ComData dataA;
  calcGradient(dataA, FLAGS_config_file_a);
  LOG(INFO) << "\n\ntraining of Network A is finished\n\n";
  ComData dataB;
  calcGradient(dataB, FLAGS_config_file_b);
  LOG(INFO) << "\n\ntraining of the Network B is finished\n\n";
  compareGradient(dataA, dataB);
 }
 int main(int argc, char** argv) {
  paddle::initMain(argc, argv);
  testing::InitGoogleTest(&argc, argv);
  initPython(argc, argv);
 #ifndef PADDLE_TYPE_DOUBLE
  if (FLAGS_need_high_accuracy) {
    LOG(INFO) << "skip test due to it's need high accuracy";
    return 0;
  }
  if (FLAGS_max_diff_ratio == 0.0f) {
    FLAGS_max_diff_ratio = 2e-4;
    LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio
              << " in low accuracy mode";
  }
 #else
  if (FLAGS_max_diff_ratio == 0.0f) {
    FLAGS_max_diff_ratio = 2e-7;
    LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio
              << " in high accuracy mode";
  }
 #endif
  int ret = RUN_ALL_TESTS();
  return ret;
 }
--- a/Show More
+++ b/Show More
		`@ -1,2 +0,0 @@`
			`./test_ProtoDataProvider/data1.bin`
			`./test_ProtoDataProvider/data2.bin`
		`@ -1,2 +0,0 @@`
			`./test_ProtoDataProvider/data1.bin.gz`
			`./test_ProtoDataProvider/data2.bin.gz`
		`@ -1 +0,0 @@`
			`./trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data`