follow comments.

8 years ago · 82e4fab4e3
parent b7359ee3ce
commit 82e4fab4e3
4 changed files with 58 additions and 65 deletions
--- a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
+++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp
@ -80,13 +80,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
      << "input of " << getName()
      << " must be a sequence or a nested sequence.";
  CHECK_EQ(input.value->getWidth(), 1UL)
-      << "input of " << getName()
-      << " is score over a sequence or a nested sequence, so its width "
-      << " must be 1.";
+      << "input of " << getName() << " are scores over a sequence or "
+      << "a nested sequence, so its width must be 1.";

  if (useGpu_) {
-    // this Layer runs only in CPU, if the model is runing on GPU,
-    // then copy the input to this layer from GPU to CPU.
+    /*
+     * currently, this Layer only runs in CPU, if the other part of the model is
+     * runing on GPU, then copy the input to this layer from GPU to CPU.
+     */
    Matrix::resizeOrCreate(scores_,
                           inputScore->getHeight(),
                           1,
@ -97,13 +98,14 @@ void KmaxSeqScoreLayer::forward(PassType passType) {
    scores_ = inputScore;
  }

-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but output of this layer which is some selected indices of the give
+   * sequence are actually filled with int types so that storing int types
+   * information in a real number matrix is dangerous, since real numbers will
+   * be convered to int types.
+   */
  Matrix::resizeOrCreate(
      output_.value,
      input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
--- a/paddle/gserver/layers/SequenceSliceLayer.cpp
+++ b/paddle/gserver/layers/SequenceSliceLayer.cpp
@ -31,13 +31,15 @@ public:
  void backward(const UpdateCallback& callback = nullptr) override;

 private:
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but the second and the (optional) third input which are some
+   * selected indices of the give sequence to trim the sequence, are actually
+   * filled with int types so that storing int types information in real number
+   * matrices is very dangerous, since real numbers will be convered to int
+   * types. If a user fills this matrix himself, invalid data may occor.
+   */

  MatrixPtr startIdsOnCpu_;
  MatrixPtr endIdsOnCpu_;
@ -68,7 +70,7 @@ bool SequenceSliceLayer::init(const LayerMap& layerMap,

 void SequenceSliceLayer::checkInputs() {
  const Argument& inputSeq = getInput(0);
-  CHECK(inputSeq.hasSeq()) << "The first input of sequence slic layer "
+  CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer "
                           << "must be a sequence.";
  const MatrixPtr indices1 = getInputValue(1);
  CHECK_EQ(static_cast<size_t>(indices1->getHeight()),
@ -86,22 +88,6 @@ void SequenceSliceLayer::checkInputs() {
 }

 void SequenceSliceLayer::copySliceIdsToCpu() {
-  if (!useGpu_) {
-    if (inputLayers_.size() == 2U) {
-      if (config_.select_first()) {
-        startIdsOnCpu_ = getInputValue(1);
-        endIdsOnCpu_ = nullptr;
-      } else {
-        startIdsOnCpu_ = nullptr;
-        endIdsOnCpu_ = getInputValue(1);
-      }
-    } else if (inputLayers_.size() == 3U) {
-      startIdsOnCpu_ = getInputValue(1);
-      endIdsOnCpu_ = getInputValue(2);
-    }
-    return;
-  }
-
  const MatrixPtr indices1 = getInputValue(1);
  if (inputLayers_.size() == 2U) {
    if (config_.select_first()) {
@ -141,22 +127,19 @@ void SequenceSliceLayer::copySliceIdsToCpu() {

 void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
                                         const MatrixPtr ends) {
+  CHECK(starts && ends);
+
  outSeqStartPos_.resize(1, 0);
  outSubSeqStartPos_.resize(1, 0);
  selectedRows_.clear();

  size_t beamSize = starts ? starts->getWidth() : ends->getWidth();
-  // iterate over sequence
  size_t rowIdx = 0;
  for (size_t i = 0; i < inputSeqInfoVec_.size(); ++i) {
-    // iterate over sub-sequence in a sequence
    for (size_t j = 0; j < inputSeqInfoVec_[i].size() - 1; ++j) {
-      // iterate over each index for slicing.
      for (size_t k = 0; k < beamSize; ++k) {
-        if (starts) {
-          if (starts->getElement(rowIdx, k) == -1.) break;
-        } else if (ends->getElement(rowIdx, k) == -1.)
-          break;
+        if (starts && starts->getElement(rowIdx, k) == -1.) break;
+        if (ends && ends->getElement(rowIdx, k) == -1.) break;

        int begPos = inputSeqInfoVec_[i][j];
        if (starts) begPos += starts->getElement(rowIdx, k);
@ -165,7 +148,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts,
        if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k);

        int seqLen = endPos - begPos + 1;
-        CHECK(seqLen);
+        CHECK_LT(seqLen, 0U);
        for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m);
        inputSeqInfoVec_.size() > 1
            ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen)
@ -208,7 +191,16 @@ void SequenceSliceLayer::forward(PassType passType) {
  Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
                              inputSeq.subSequenceStartPositions,
                              inputSeqInfoVec_);
-  copySliceIdsToCpu();
+  if (!useGpu_) {
+    if (inputLayers_.size() == 2U) {
+      startIdsOnCpu_ = config_.select_first() ? getInputValue(1) : nullptr;
+      endIdsOnCpu_ = config_.select_first() ? nullptr : getInputValue(1);
+    } else if (inputLayers_.size() == 3U) {
+      startIdsOnCpu_ = getInputValue(1);
+      endIdsOnCpu_ = getInputValue(2);
+    }
+  } else
+    copySliceIdsToCpu();

  // calculate the selected row indices in a batch,
  // and build the output sequence information.
@ -221,10 +213,7 @@ void SequenceSliceLayer::forward(PassType passType) {
 }

 void SequenceSliceLayer::backward(const UpdateCallback& callback) {
-  MatrixPtr inputSeqGrad = getInputGrad(0);
-  MatrixPtr outputGrad = getOutputGrad();
-
-  outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
+  getOutputGrad()->addToRows(*getInputGrad(0), *rowIndice_);
 }

 }  // namespace paddle
--- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp
@ -58,23 +58,28 @@ private:
  void calSelectedRows(const MatrixPtr selectedIndices,
                       const std::vector<std::vector<int>>& inputSeqInfo);

-  // if the second input of this layer is on GPU memory, copy it to CPU memory.
-  // TODO(caoying)
-  // In PaddlePaddle, the currently available matrixes all a have real-typed
-  // data field, but the selected indices information are actually int-typed
-  // (with -1 as a special token). Storing indices information in real-typed
-  // Matrix leads to converting real to int. This is very dangerous if a user
-  // fills this matrix himself, invalid data may occur.
-  // The selected indices should be stored in an int-typed matrix.
+  /*
+   * TODO(caoying)
+   * In PaddePaddle, currently all matrices are real number types,
+   * but the second is some selected indices of the give sequence to trim
+   * the nested sequence, are actually filled with int types so that storing
+   * int types information in real number matrices is very dangerous, since
+   * real numbers will be convered to int types. If a user fills this matrix
+   * himself, invalid data may occor.
+   *
+   * if the second input of this layer is on GPU memory, copy it to CPU memory.
+   */
  MatrixPtr selIdsCpu_;

-  // reorganized sequenceStartPositions and subSequenceStartPositions
-  // into a 2d vector to facilitate the sequence selection process.
+  /*
+   * reorganize sequenceStartPositions and subSequenceStartPositions
+   * into a 2d vector to facilitate the sequence selection process.
+   */
  std::vector<std::vector<int>> inputSeqInfoVec_;

-  // the final selected row indices in a batch,
-  // rowIndice_ and selectedRows_ actually share a same memory.
+  /* store the final selected row indices in a batch */
  IVectorPtr rowIndice_;
+  /* rowIndice_ and selectedRows_ actually share a same memory. */
  std::vector<int> selectedRows_;
 };

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -2717,10 +2717,7 @@ class SeqSliceLayer(LayerBase):
                    'If start and end indices are both given to'
                    'sequence slice layer, they should have the same width.')
        elif len(inputs) == 2:
-            if starts is not None:
-                self.config.select_first = True
-            else:
-                self.config.select_first = False
+            self.config.select_first = (starts is not None)


@config_layer('sub_nested_seq')