Merge branch 'develop' of github.com:baidu/Paddle into feature/minus_op

8 years ago · dee4c832cc
parent 481dd02fbb ab6b3c481a
commit dee4c832cc
19 changed files with 757 additions and 73 deletions
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@ -257,6 +257,11 @@ seq_concat
 ..  autoclass:: paddle.v2.layer.seq_concat
    :noindex:
 seq_slice
 ---------
 ..  autoclass:: paddle.v2.layer.seq_slice
    :noindex:
 kmax_sequence_score
 -------------------
 ..  autoclass:: paddle.v2.layer.kmax_sequence_score
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@ -15,6 +15,7 @@ if(Boost_FOUND)
  add_subdirectory(platform)
  add_subdirectory(framework)
  add_subdirectory(operators)
  add_subdirectory(pybind)
 endif()
 if(WITH_C_API)
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -39,24 +39,3 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
 cc_library(backward SRCS backward.cc DEPS net_op)
 cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
 if(WITH_PYTHON)
 cc_library(paddle_pybind SHARED
    SRCS pybind.cc
    DEPS pybind python backward
    sgd_op
    gather_op
    add_op
    mul_op
    rowwise_add_op
    sigmoid_op
    softmax_op
    mean_op
    cross_entropy_op
    recurrent_op
    uniform_random_op
    gaussian_random_op
    fill_zeros_like_op
    scale_op
    minus_op)
 endif(WITH_PYTHON)
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
      << "input of " << getName()
      << " must be a sequence or a nested sequence.";
  CHECK_EQ(input.value->getWidth(), 1UL)
-      << "input of " << getName()
+      << "input of " << getName() << " are scores over a sequence or "
-      << " is score over a sequence or a nested sequence, so its width "
+      << "a nested sequence, so its width must be 1.";
      << " must be 1.";
  if (useGpu_) {
-    // this Layer runs only in CPU, if the model is runing on GPU,
+    /*
-    // then copy the input to this layer from GPU to CPU.
+     * currently, this Layer only runs in CPU, if the other part of the model is
     * runing on GPU, then copy the input to this layer from GPU to CPU.
     */
    Matrix::resizeOrCreate(scores_,
                           inputScore->getHeight(),
                           1,
@ -97,6 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
    scores_ = inputScore;
  }
  /*
   * TODO(caoying)
   * In PaddePaddle, currently all matrices are real number types,
   * but output of this layer which is some selected indices of the give
   * sequence are actually filled with int types so that storing int types
   * information in a real number matrix is dangerous, since real numbers will
   * be convered to int types.
   */
  Matrix::resizeOrCreate(
      output_.value,
      input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@ -0,0 +1,221 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/Vector.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 namespace paddle {
 class SequenceSliceLayer : public Layer {
 public:
  explicit SequenceSliceLayer(const LayerConfig& config) : Layer(config) {}
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
  void forward(PassType passType) override;
  void backward(const UpdateCallback& callback = nullptr) override;
 private:
  /*
   * TODO(caoying)
   * In PaddePaddle, currently all matrices are real number types,
   * but the second and the (optional) third input which are some
   * selected indices of the give sequence to trim the sequence, are actually
   * filled with int types so that storing int types information in real number
   * matrices is very dangerous, since real numbers will be convered to int
   * types. If a user fills this matrix himself, invalid data may occor.
   */
  MatrixPtr startIdsOnCpu_;
  MatrixPtr endIdsOnCpu_;
  std::vector<int> selectedRows_;
  IVectorPtr rowIndice_;
  std::vector<std::vector<int>> inputSeqInfoVec_;
  std::vector<int> outSubSeqStartPos_;
  std::vector<int> outSeqStartPos_;
  void checkInputs();
  void copySliceIdsToCpu();
  void calSelectedRows(const MatrixPtr starts, const MatrixPtr ends);
 };
 REGISTER_LAYER(seq_slice, SequenceSliceLayer);
 bool SequenceSliceLayer::init(const LayerMap& layerMap,
                              const ParameterMap& parameterMap) {
  /* Initialize the basic parent class */
  Layer::init(layerMap, parameterMap);
  CHECK_GE(inputLayers_.size(), 2U);
  CHECK_LE(inputLayers_.size(), 3U);
  setNeedSequenceInfo(false);
  return true;
 }
 void SequenceSliceLayer::checkInputs() {
  const Argument& inputSeq = getInput(0);
  CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer "
                           << "must be a sequence.";
  const MatrixPtr indices1 = getInputValue(1);
  CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
           inputSeq.hasSubseq() ? inputSeq.getNumSubSequences()
                                : inputSeq.getNumSequences())
      << "Height of the second input should be equal to number of sequence "
      << "in the first input.";
  if (inputLayers_.size() == 3) {
    const MatrixPtr indices2 = getInputValue(2);
    CHECK_EQ(indices2->getHeight(), indices1->getHeight())
        << "start indices and end indices should have the same height.";
    CHECK_EQ(indices2->getWidth(), indices1->getWidth())
        << "start indices and end indices should have the same Width.";
  }
 }
 void SequenceSliceLayer::copySliceIdsToCpu() {
  const MatrixPtr indices1 = getInputValue(1);
  if (inputLayers_.size() == 2U) {
    if (config_.select_first()) {
      Matrix::resizeOrCreate(startIdsOnCpu_,
                             indices1->getHeight(),
                             indices1->getWidth(),
                             false /* trans */,
                             false /* useGpu */);
      startIdsOnCpu_->copyFrom(*indices1);
      endIdsOnCpu_ = nullptr;
    } else {
      Matrix::resizeOrCreate(endIdsOnCpu_,
                             indices1->getHeight(),
                             indices1->getWidth(),
                             false /* trans */,
                             false /* useGpu */);
      endIdsOnCpu_->copyFrom(*indices1);
      startIdsOnCpu_ = nullptr;
    }
  } else if (inputLayers_.size() == 3U) {
    Matrix::resizeOrCreate(startIdsOnCpu_,
                           indices1->getHeight(),
                           indices1->getWidth(),
                           false /* trans */,
                           false /* useGpu */);
    startIdsOnCpu_->copyFrom(*indices1);
    const MatrixPtr indices2 = getInputValue(2);
    Matrix::resizeOrCreate(endIdsOnCpu_,
                           indices2->getHeight(),
                           indices2->getWidth(),
                           false /* trans */,
                           false /* useGpu */);
    endIdsOnCpu_->copyFrom(*indices2);
  }
 }
 void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
                                         const MatrixPtr ends) {
  CHECK(starts || ends) << "At least one of the start or end indices "
                        << "should be given.";
  outSeqStartPos_.resize(1, 0);
  outSubSeqStartPos_.resize(1, 0);
  selectedRows_.clear();
  size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
  size_t rowIdx = 0;
  for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
    for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
      for (size_t k = 0; k < beamSize; ++k) {
        if (starts && starts->getElement(rowIdx, k) == -1.) break;
        if (ends && ends->getElement(rowIdx, k) == -1.) break;
        int begPos = inputSeqInfoVec_[i][j];
        if (starts) begPos += starts->getElement(rowIdx, k);
        int endPos = inputSeqInfoVec_[i][j + 1] - 1;
        if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);
        int seqLen = endPos - begPos + 1;
        CHECK_GT(seqLen, 0U);
        for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
        inputSeqInfoVec_.size() > 1
            ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
            : outSeqStartPos_.push_back(outSeqStartPos_.back() + seqLen);
      }
      rowIdx++;
    }
    if (inputSeqInfoVec_.size() > 1)
      outSeqStartPos_.push_back(outSubSeqStartPos_.back());
  }
  if (useGpu_) {
    rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
    rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
  } else {
    rowIndice_ =
        IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
  }
  // create the sequence information for the output.
  ICpuGpuVector::resizeOrCreate(
      output_.sequenceStartPositions, outSeqStartPos_.size(), false);
  output_.sequenceStartPositions->copyFrom(
      outSeqStartPos_.data(), outSeqStartPos_.size(), false);
  if (inputSeqInfoVec_.size() > 1) {
    ICpuGpuVector::resizeOrCreate(
        output_.subSequenceStartPositions, outSubSeqStartPos_.size(), false);
    output_.subSequenceStartPositions->copyFrom(
        outSubSeqStartPos_.data(), outSubSeqStartPos_.size(), false);
  }
 }
 void SequenceSliceLayer::forward(PassType passType) {
  Layer::forward(passType);
  checkInputs();
  const Argument& inputSeq = getInput(0);
  inputSeqInfoVec_.clear();
  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                              inputSeq.subSequenceStartPositions,
                              inputSeqInfoVec_);
  if (!useGpu_) {
    if (inputLayers_.size() == 2U) {
      startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr;
      endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1);
    } else if (inputLayers_.size() == 3U) {
      startIdsOnCpu_ = getInputValue(1);
      endIdsOnCpu_ = getInputValue(2);
    }
  } else {
    copySliceIdsToCpu();
  }
  // calculate the selected row indices in a batch,
  // and build the output sequence information.
  calSelectedRows(startIdsOnCpu_ ? startIdsOnCpu_ : nullptr,
                  endIdsOnCpu_ ? endIdsOnCpu_ : nullptr);
  resetOutput(selectedRows_.size(), getSize());
  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
 }
 void SequenceSliceLayer::backward(const UpdateCallback& callback) {
  getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_);
 }
 }  // namespace paddle
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@ -52,23 +52,34 @@ private:
   *   ]
   *
   * ths output is saved to private member rowIndice_;
-   * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+   * [0,1,2,3,4,5,6,7,8,9,15,16,17,18,19,20,21,23,24,25,26,27]
   *  16,17,18,19,20,21,22,23,24,25,26,27]
   */
-  void calSelectedCols(const MatrixPtr selectedIndices,
+  void calSelectedRows(const MatrixPtr selectedIndices,
                       const std::vector<std::vector<int>>& inputSeqInfo);
-  // if the second input of this layer is on GPU memory, copy it to CPU memory.
+  /*
   * TODO(caoying)
   * In PaddePaddle, currently all matrices are real number types,
   * but the second is some selected indices of the give sequence to trim
   * the nested sequence, are actually filled with int types so that storing
   * int types information in real number matrices is very dangerous, since
   * real numbers will be convered to int types. If a user fills this matrix
   * himself, invalid data may occor.
   *
   * if the second input of this layer is on GPU memory, copy it to CPU memory.
   */
  MatrixPtr selIdsCpu_;
-  // reorganized sequenceStartPositions and subSequenceStartPositions
+  /*
-  // into a 2d vector to facilitate the sequence selection process.
+   * reorganize sequenceStartPositions and subSequenceStartPositions
   * into a 2d vector to facilitate the sequence selection process.
   */
  std::vector<std::vector<int>> inputSeqInfoVec_;
-  // the final selected row indices in a batch,
+  /* store the final selected row indices in a batch */
  // rowIdx_ and selectedRows_ actually share a same memory.
  IVectorPtr rowIndice_;
  /* rowIndice_ and selectedRows_ actually share a same memory. */
  std::vector<int> selectedRows_;
 };
@ -83,7 +94,7 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
  return true;
 }
-void SubNestedSequenceLayer::calSelectedCols(
+void SubNestedSequenceLayer::calSelectedRows(
    const MatrixPtr selectedIndices,
    const std::vector<std::vector<int>>& inputSeqInfo) {
  selectedRows_.clear();
@ -160,7 +171,7 @@ void SubNestedSequenceLayer::forward(PassType passType) {
  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                              inputSeq.subSequenceStartPositions,
                              inputSeqInfoVec_);
-  calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
+  calSelectedRows(selIdsCpu_, inputSeqInfoVec_);
  resetOutput(selectedRows_.size(), getSize());
  getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@ -34,6 +34,12 @@ add_unittest_without_exec(test_CRFLayerGrad
 add_test(NAME test_CRFLayerGrad
    COMMAND test_CRFLayerGrad)
 ################ test_SeqSliceLayerGrad ####################
 add_unittest_without_exec(test_SeqSliceLayerGrad
    test_SeqSliceLayerGrad.cpp
    LayerGradUtil.cpp)
 add_test(NAME test_SeqSliceLayerGrad
    COMMAND test_SeqSliceLayerGrad)
 add_unittest_without_exec(test_ActivationGrad
    test_ActivationGrad.cpp
--- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
+++ b/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp
@ -0,0 +1,223 @@
 /* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <gtest/gtest.h>
 #include "ModelConfig.pb.h"
 #include "paddle/gserver/layers/DataLayer.h"
 #include "paddle/trainer/Trainer.h"
 #include "LayerGradUtil.h"
 #include "paddle/testing/TestUtil.h"
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 const int MAX_SEQ_NUM = 17;
 const int MAX_SEQ_LEN = 23;
 const int MAX_BEAM_SIZE = 13;
 vector<real> randSampling(real range, int n) {
  CHECK_GE(range, n);
  vector<real> num(range);
  iota(begin(num), end(num), 0.);
  if (range == n) return num;
  random_shuffle(begin(num), end(num));
  num.resize(n);
  sort(begin(num), end(num));
  return num;
 }
 void genSeqInfo(vector<int>& seqStartPos, vector<int>& subSeqStartPos) {
  seqStartPos.resize(1, 0);
  subSeqStartPos.resize(1, 0);
  srand((size_t)(time(NULL)));
  int seqNum = 1 + (rand() % MAX_SEQ_NUM);
  for (int i = 0; i < seqNum; ++i) {
    int subSeqNum = 1 + (rand() % MAX_SEQ_NUM);
    for (int j = 0; j < subSeqNum; ++j)
      subSeqStartPos.push_back(subSeqStartPos.back() +
                               (1 + (rand() % MAX_SEQ_LEN)));
    seqStartPos.push_back(subSeqStartPos.back());
  }
 }
 /*
  generate start indices according to sequence start positions.
 */
 void genStarts(vector<int>& seqStartPos,
               vector<vector<real>>& starts,
               size_t beamSize) {
  starts.clear();
  starts.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
  for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
    int seqLen = seqStartPos[i + 1] - seqStartPos[i];
    vector<real> randStarts =
        randSampling(seqLen, min(seqLen, static_cast<int>(beamSize)));
    copy(begin(randStarts), end(randStarts), begin(starts[i]));
  }
 }
 /*
  generate end indices according to sequence start positions and start indices.
 */
 void genEnds(vector<int>& seqStartPos,
             vector<vector<real>>& starts,
             vector<vector<real>>& ends,
             size_t beamSize) {
  CHECK_EQ(seqStartPos.size() - 1, starts.size());
  ends.clear();
  ends.resize(seqStartPos.size() - 1, vector<real>(beamSize, -1.));
  for (size_t i = 0; i < starts.size(); ++i) {
    for (size_t j = 0; j < starts[i].size(); ++j) {
      int seqLen = seqStartPos[i + 1] - seqStartPos[i];
      CHECK_GE(seqLen - 1, starts[i][j]);
      if (starts[i][j] == -1.) break;
      if (starts[i][j] == (seqLen - 1)) {
        ends[i][j] = starts[i][j];
      } else {
        ends[i][j] = starts[i][j] + randSampling(seqLen - starts[i][j], 1)[0];
      }
    }
  }
 }
 void genTestData(vector<int>& seqStartPos,
                 vector<int>& subSeqStartPos,
                 vector<vector<real>>& starts,
                 vector<vector<real>>& ends,
                 bool hasSubseq) {
  size_t beamSize = 1 + (rand() % MAX_BEAM_SIZE);
  genSeqInfo(seqStartPos, subSeqStartPos);
  genStarts(hasSubseq ? subSeqStartPos : seqStartPos, starts, beamSize);
  genEnds(hasSubseq ? subSeqStartPos : seqStartPos, starts, ends, beamSize);
 }
 template <typename T>
 void flatten2dVector(vector<vector<T>>& inVec, vector<T>& outVec) {
  size_t totalSize{0};
  for (auto const& items : inVec) totalSize += items.size();
  outVec.reserve(totalSize);
  for (auto& items : inVec)
    move(items.begin(), items.end(), back_inserter(outVec));
 }
 void testSeqSliceLayer(bool hasSubseq,
                       bool useGpu,
                       vector<int>& seqStartPos,
                       vector<int>& subSeqStartPos,
                       vector<vector<real>>& starts,
                       vector<vector<real>>& ends) {
  // layer size is not crutial for this layer,
  // so here use a small layer size in the unittest.
  const size_t layerSize{4};
  TestConfig config;
  config.layerConfig.set_type("seq_slice");
  config.layerConfig.set_size(layerSize);
  // add the first input
  MatrixPtr seqInputPtr =
      Matrix::create(hasSubseq ? subSeqStartPos.back() : seqStartPos.back(),
                     layerSize,
                     false,
                     false);
  seqInputPtr->randomizeUniform();
  if (hasSubseq) {
    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
                                "seq_input",
                                seqInputPtr,
                                seqStartPos,
                                subSeqStartPos});
  } else {
    config.inputDefs.push_back(
        {INPUT_SELF_DEFINE_DATA, "seq_input", seqInputPtr, seqStartPos});
  }
  config.layerConfig.add_inputs();
  // add start indices
  if (starts.size()) {
    vector<real> startsToVec;
    flatten2dVector(starts, startsToVec);
    MatrixPtr startMatrixPtr =
        Matrix::create(starts.size(), starts[0].size(), false, false);
    startMatrixPtr->copyFrom(startsToVec.data(), startsToVec.size());
    config.inputDefs.push_back(
        {INPUT_SELF_DEFINE_DATA, "starts", startMatrixPtr});
    config.layerConfig.add_inputs();
    config.layerConfig.set_select_first(true);
  }
  // add end indices
  if (ends.size()) {
    vector<real> endsToVec;
    flatten2dVector(ends, endsToVec);
    MatrixPtr endMatrixPtr =
        Matrix::create(ends.size(), ends[0].size(), false, false);
    endMatrixPtr->copyFrom(endsToVec.data(), endsToVec.size());
    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "ends", endMatrixPtr});
    config.layerConfig.add_inputs();
    config.layerConfig.set_select_first(false);
  }
  testLayerGrad(config, "seq_slice", /*batchSize*/ 100, false, useGpu, false);
 }
 TEST(Layer, SeqSliceLayer) {
  vector<int> seqStartPos;
  vector<int> subSeqStartPos;
  vector<vector<real>> starts;
  vector<vector<real>> ends;
  std::vector<bool> mode = {false};
 #ifndef PADDLE_ONLY_CPU
  mode.push_back(true);
 #endif
  genSeqInfo(seqStartPos, subSeqStartPos);
  for (bool hasSubseq : {true, false}) {
    LOG(INFO) << "hasSubSeq : " << hasSubseq;
    genTestData(seqStartPos, subSeqStartPos, starts, ends, hasSubseq);
    for (bool useGpu : mode) {
      vector<vector<real>> tmp;
      testSeqSliceLayer(
          hasSubseq, useGpu, seqStartPos, subSeqStartPos, tmp, ends);
      testSeqSliceLayer(
          hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, tmp);
      testSeqSliceLayer(
          hasSubseq, useGpu, seqStartPos, subSeqStartPos, starts, ends);
    }
  }
 }
 int main(int argc, char** argv) {
  initMain(argc, argv);
  hl_start();
  hl_init(FLAGS_gpu_id);
  FLAGS_thread_local_rand_use_global_seed = true;
  srand(1);
  testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
 }
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@ -676,19 +676,28 @@ void Argument::reorganizeSeqInfo(
    const ICpuGpuVectorPtr seqStartPos,
    const ICpuGpuVectorPtr subSeqStartPos,
    std::vector<std::vector<int>>& reorganizedSeqInfo) {
-  int* seqStarts = seqStartPos->getMutableData(false);
+  CHECK(seqStartPos);
  int* subSeqStarts = subSeqStartPos->getMutableData(false);
  int seqNum = seqStartPos->getSize() - 1;
-  reorganizedSeqInfo.resize(seqNum, std::vector<int>());
+  int* seqStarts = seqStartPos->getMutableData(false);
-  int seqIdx = 0;
+
-  for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
+  if (subSeqStartPos) {
-    reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
+    int* subSeqStarts = subSeqStartPos->getMutableData(false);
-    if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
+    reorganizedSeqInfo.resize(seqNum, std::vector<int>());
-      seqIdx++;
+    int seqIdx = 0;
-      if (seqIdx == seqNum) return;
+    for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
      reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
      if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
        seqIdx++;
        if (seqIdx == seqNum) return;
        reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
      }
    }
  } else {
    reorganizedSeqInfo.resize(1, std::vector<int>(seqNum + 1, 0));
    memcpy(reorganizedSeqInfo[0].data(),
           seqStarts,
           sizeof(int) * seqStartPos->getSize());
  }
 }
--- a/paddle/pybind/CMakeLists.txt
+++ b/paddle/pybind/CMakeLists.txt
@ -0,0 +1,20 @@
 if(WITH_PYTHON)
 cc_library(paddle_pybind SHARED
    SRCS pybind.cc
    DEPS pybind python backward
    sgd_op
    gather_op
    add_op
    mul_op
    rowwise_add_op
    sigmoid_op
    softmax_op
    mean_op
    cross_entropy_op
    recurrent_op
    uniform_random_op
    gaussian_random_op
    fill_zeros_like_op
    scale_op
    minus_op)
 endif(WITH_PYTHON)
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@ -18,11 +18,11 @@ limitations under the License. */
 #include "paddle/framework/backward.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/tensor_py.h"
 #include "paddle/operators/net_op.h"
 #include "paddle/operators/recurrent_op.h"
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
 #include "paddle/pybind/tensor_py.h"
 #include "paddle/string/to_string.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
@ -135,7 +135,8 @@ All parameter, weight, gradient are variables in Paddle.
           py::return_value_policy::reference)
      .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
      .def(py::init<>())
-      .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
+      .def("new_scope",
           [](Scope &self) -> Scope * { return &self.NewScope(); },
           py::return_value_policy::reference)
      .def("drop_kids", &Scope::DropKids);
@ -223,8 +224,10 @@ All parameter, weight, gradient are variables in Paddle.
                    retv->SetType("plain_net");
                    return retv;
                  })
-      .def("append_op", [](operators::NetOp &self,
+      .def("append_op",
-                           const OperatorBase &op) { self.AppendOp(op); })
+           [](operators::NetOp &self, const OperatorBase &op) {
             self.AppendOp(op);
           })
      .def("complete_add_op", &operators::NetOp::CompleteAddOp)
      .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
        self->CompleteAddOp();
@ -244,10 +247,9 @@ All parameter, weight, gradient are variables in Paddle.
            auto rnn_op = OpRegistry::CreateOp(desc);
            return static_cast<operators::RecurrentOp *>(rnn_op.release());
          })
-      .def("set_stepnet", [](operators::RecurrentOp &self,
+      .def("set_stepnet",
-                             const operators::NetOp &net) -> void {
+           [](operators::RecurrentOp &self, const operators::NetOp &net)
-        self.set_stepnet(net.Clone());
+               -> void { self.set_stepnet(net.Clone()); });
      });
  m.def("unique_integer", UniqueIntegerGenerator);
--- a/paddle/framework/tensor_py.h
+++ b/paddle/framework/tensor_py.h
@ -63,8 +63,11 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
      }
      return py::buffer_info(
          dst_tensor.mutable_data<CUR_TYPE>(dst_tensor.holder_->place()),
-          sizeof(CUR_TYPE), py::format_descriptor<CUR_TYPE>::format(),
+          sizeof(CUR_TYPE),
-          (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
+          py::format_descriptor<CUR_TYPE>::format(),
          (size_t)framework::arity(dst_tensor.dims()),
          dims_outside,
          strides);
    } else {
      constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
      return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
@ -107,8 +110,8 @@ void PyCUDATensorSetFromArray(
  self.Resize(framework::make_ddim(dims));
  auto *dst = self.mutable_data<T>(place);
-  paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
+  paddle::platform::GpuMemcpySync(
-                                  cudaMemcpyHostToDevice);
+      dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice);
 }
 #endif
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -2694,6 +2694,49 @@ class SubSequenceLayer(LayerBase):
        self.create_bias_parameter(bias, size)
@config_layer('seq_slice')
 class SeqSliceLayer(LayerBase):
    def __init__(self, name, inputs, starts, ends, bias=False, **xargs):
        if isinstance(inputs, list):
            assert len(inputs) == 1, ('the first input of sequence slice layer '
                                      'is a single sequence input.')
        else:
            inputs = [inputs]
        if starts is not None:
            if isinstance(starts, list):
                assert len(starts) == 1, (
                    'the start indices for sequence slice layer cannot '
                    'be a list having more than one element.')
                starts = starts[0]
            inputs.append(starts)
        if ends is not None:
            if isinstance(ends, list):
                assert len(ends) == 1, (
                    'the end indices for sequence slice layer cannot '
                    'be a list having more than one element.')
                ends = ends[0]
            inputs.append(ends)
        assert len(inputs) >= 2, (
            'the sequence slice layer has at least two inputs.')
        super(SeqSliceLayer, self).__init__(
            name, 'seq_slice', 0, inputs=inputs, **xargs)
        input_layer0 = self.get_input_layer(0)
        size = input_layer0.size
        self.set_layer_size(size)
        if len(inputs) == 3:
            assert (
                self.get_input_layer(1).size == self.get_input_layer(2).size), (
                    'If start and end indices are both given to'
                    'sequence slice layer, they should have the same width.')
        elif len(inputs) == 2:
            self.config.select_first = (starts is not None)
@config_layer('sub_nested_seq')
 class SubNestedSequenceLayer(LayerBase):
    def __init__(self, name, inputs, selected_indices, bias=False, **xargs):
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -134,6 +134,7 @@ __all__ = [
    'sub_nested_seq_layer',
    'clip_layer',
    'slice_projection',
    'seq_slice_layer',
    'kmax_sequence_score_layer',
    'scale_shift_layer',
 ]
@ -231,6 +232,7 @@ class LayerType(object):
    CROP_LAYER = 'crop'
    SUB_NESTED_SEQ = 'sub_nested_seq'
    CLIP_LAYER = 'clip'
    SEQ_SLICE = 'seq_slice'
    KMAX_SEQ_SCORE = 'kmax_seq_score'
    SCALE_SHIFT_LAYER = 'scale_shift'
@ -6193,6 +6195,72 @@ def clip_layer(input, min, max, name=None):
        name, LayerType.CLIP_LAYER, parents=[input], size=input.size)
@wrap_name_default()
 def seq_slice_layer(input, starts, ends, name=None):
    """
    seq_slice_layer will return one or several sub-sequences from the
    input sequence layer given start and end indices.
        - If only start indices are given, and end indices are set to None,
          this layer slices the input sequence from the given start indices
          to its end.
        - If only end indices are given, and start indices are set to None,
          this layer slices the input sequence from its beginning to the
          given end indices.
        - If start and end indices are both given, they should have the same
          number of elements.
    If start or end indices contains more than one elements, the input sequence
    will be sliced for multiple times.
    .. code-block:: python
        seq_silce = seq_slice_layer(input=input_seq,
                                    starts=start_pos, ends=end_pos)
    :param name: name of this layer.
    :type name: basestring
    :param input: input for this layer, it should be a sequence.
    :type input: LayerOutput
    :param starts: start indices to slice the input sequence.
    :type starts: LayerOutput|None
    :param ends: end indices to slice the input sequence.
    :type ends: LayerOutput|None
    :return: LayerOutput object.
    :rtype: LayerOutput
    """
    assert isinstance(input, LayerOutput), (
        'The first input of seq_slice layer must be a PaddlePaddle layer.')
    if starts is not None:
        assert isinstance(starts, LayerOutput), (
            'The start indices for seq_slice layer '
            'must be a PaddlePaddle layer.')
    if ends is not None:
        assert isinstance(ends, LayerOutput), (
            'The end indices for seq_slice layer must be a PaddlePaddle layer.')
    assert starts is not None or ends is not None, (
        'start and end indices '
        'cannot be set to None at the same time, at least one of '
        'them should be given.')
    if starts is not None and ends is not None:
        assert starts.size == ends.size, (
            'If start and end indices are both given to seq_slice_layer, '
            'they should have the same width.')
    Layer(
        name=name,
        type=LayerType.SEQ_SLICE,
        inputs=input.name,
        starts=starts.name if starts is not None else None,
        ends=ends.name if ends is not None else None)
    return LayerOutput(
        name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
@wrap_name_default()
@layer_support()
 def kmax_sequence_score_layer(input, name=None, beam_size=1):
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@ -8,6 +8,7 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer)
+test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
 test_seq_slice_layer)
 export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
@ -1,12 +1,6 @@
 type: "nn"
 layers {
-  name: "input"
+  name: "input_seq"
  type: "data"
  size: 300
  active_type: ""
 }
 layers {
  name: "data"
  type: "data"
  size: 128
  active_type: ""
@ -17,7 +11,7 @@ layers {
  size: 1
  active_type: "exponential"
  inputs {
-    input_layer_name: "data"
+    input_layer_name: "input_seq"
    input_parameter_name: "___fc_layer_0__.w0"
  }
  bias_parameter_name: "___fc_layer_0__.wbias"
@ -51,15 +45,14 @@ parameters {
  initial_strategy: 0
  initial_smart: false
 }
-input_layer_names: "data"
+input_layer_names: "input_seq"
 output_layer_names: "__kmax_sequence_score_layer_0__"
 sub_models {
  name: "root"
-  layer_names: "input"
+  layer_names: "input_seq"
  layer_names: "data"
  layer_names: "__fc_layer_0__"
  layer_names: "__kmax_sequence_score_layer_0__"
-  input_layer_names: "data"
+  input_layer_names: "input_seq"
  output_layer_names: "__kmax_sequence_score_layer_0__"
  is_recurrent_layer_group: false
 }
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
@ -0,0 +1,79 @@
 type: "nn"
 layers {
  name: "word"
  type: "data"
  size: 128
  active_type: ""
 }
 layers {
  name: "starts"
  type: "data"
  size: 5
  active_type: ""
 }
 layers {
  name: "ends"
  type: "data"
  size: 5
  active_type: ""
 }
 layers {
  name: "__seq_slice_layer_0__"
  type: "seq_slice"
  size: 128
  active_type: ""
  inputs {
    input_layer_name: "word"
  }
  inputs {
    input_layer_name: "starts"
  }
  inputs {
    input_layer_name: "ends"
  }
 }
 layers {
  name: "__seq_slice_layer_1__"
  type: "seq_slice"
  size: 128
  active_type: ""
  inputs {
    input_layer_name: "word"
  }
  inputs {
    input_layer_name: "starts"
  }
  select_first: true
 }
 layers {
  name: "__seq_slice_layer_2__"
  type: "seq_slice"
  size: 128
  active_type: ""
  inputs {
    input_layer_name: "word"
  }
  inputs {
    input_layer_name: "ends"
  }
  select_first: false
 }
 input_layer_names: "word"
 output_layer_names: "__seq_slice_layer_0__"
 output_layer_names: "__seq_slice_layer_1__"
 output_layer_names: "__seq_slice_layer_2__"
 sub_models {
  name: "root"
  layer_names: "word"
  layer_names: "starts"
  layer_names: "ends"
  layer_names: "__seq_slice_layer_0__"
  layer_names: "__seq_slice_layer_1__"
  layer_names: "__seq_slice_layer_2__"
  input_layer_names: "word"
  output_layer_names: "__seq_slice_layer_0__"
  output_layer_names: "__seq_slice_layer_1__"
  output_layer_names: "__seq_slice_layer_2__"
  is_recurrent_layer_group: false
 }
--- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
@ -2,9 +2,7 @@
 #coding=utf-8
 from paddle.trainer_config_helpers import *
-data = data_layer(name='input', size=300)
+data = data_layer(name="input_seq", size=128)
 data = data_layer(name="data", size=128)
 scores = fc_layer(input=data, size=1, act=ExpActivation())
 kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5)
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
@ -0,0 +1,13 @@
 #!/usr/bin/env python
 #coding=utf-8
 from paddle.trainer_config_helpers import *
 input_seq = data_layer("word", size=128)
 starts = data_layer("starts", size=5)
 ends = data_layer("ends", size=5)
 seq_slice1 = seq_slice_layer(input=input_seq, starts=starts, ends=ends)
 seq_slice2 = seq_slice_layer(input=input_seq, starts=starts, ends=None)
 seq_slice3 = seq_slice_layer(input=input_seq, starts=None, ends=ends)
 outputs(seq_slice1, seq_slice2, seq_slice3)