add MKL Packed RecurrentLayer

8 years ago · 624e3e5208
parent 16fd9f18e3
commit 624e3e5208
4 changed files with 546 additions and 0 deletions
--- a/paddle/gserver/CMakeLists.txt
+++ b/paddle/gserver/CMakeLists.txt
@ -34,6 +34,16 @@ else()
    message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations")
 endif()

+if(NOT WITH_MKLML)
+    file(GLOB_RECURSE MKL_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.h")
+    file(GLOB_RECURSE MKL_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.cpp")
+    list(REMOVE_ITEM GSERVER_HEADER ${MKL_HEADER})
+    list(REMOVE_ITEM GSERVER_SOURCES ${MKL_SOURCES})
+    message(STATUS "Skip compiling with MKLPackedLayers")
+else()
+    message(STATUS "Compile with MKLPackedLayers")
+endif()
+
 if(NOT WITH_GPU)
    list(REMOVE_ITEM GSERVER_HEADER
        layers/CudnnConvBaseLayer.h
--- a/paddle/gserver/layers/MKLPackedGemm.h
+++ b/paddle/gserver/layers/MKLPackedGemm.h
@ -0,0 +1,94 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/math/MathFunctions.h"
+#include "paddle/math/Matrix.h"
+
+namespace paddle {
+
+class MKLPackedGemm {
+protected:
+  real* weightPacked_;
+  real* weightTPacked_;
+  size_t weightHeight_;
+  size_t weightWidth_;
+
+public:
+  MKLPackedGemm(MatrixPtr weight) {
+    weightHeight_ = weight->getHeight();
+    weightWidth_ = weight->getWidth();
+    weightPacked_ =
+        cblas_sgemm_alloc(CblasBMatrix, 1, weightWidth_, weightHeight_);
+    weightTPacked_ =
+        cblas_sgemm_alloc(CblasBMatrix, 1, weightWidth_, weightHeight_);
+    cblas_sgemm_pack(CblasRowMajor,
+                     CblasBMatrix,
+                     CblasNoTrans,
+                     1,
+                     weightWidth_,
+                     weightHeight_,
+                     1.0,
+                     weight->getData(),
+                     weightWidth_,
+                     weightPacked_);
+    cblas_sgemm_pack(CblasRowMajor,
+                     CblasBMatrix,
+                     CblasTrans,
+                     1,
+                     weightWidth_,
+                     weightHeight_,
+                     1.0,
+                     weight->getData(),
+                     weightWidth_,
+                     weightTPacked_);
+  }
+  void compute(MatrixPtr batch2, MatrixPtr batch1, bool transW = false) {
+    if (transW) {
+      cblas_sgemm_compute(CblasRowMajor,
+                          CblasNoTrans,
+                          CblasPacked,
+                          batch2->getHeight(),
+                          weightWidth_,
+                          weightHeight_,
+                          batch1->getData(),
+                          weightHeight_,
+                          weightTPacked_,
+                          weightWidth_,
+                          1,
+                          batch2->getData(),
+                          weightWidth_);
+    } else {
+      cblas_sgemm_compute(CblasRowMajor,
+                          CblasNoTrans,
+                          CblasPacked,
+                          batch2->getHeight(),
+                          weightWidth_,
+                          weightHeight_,
+                          batch1->getData(),
+                          weightHeight_,
+                          weightPacked_,
+                          weightWidth_,
+                          1,
+                          batch2->getData(),
+                          weightWidth_);
+    }
+  }
+  ~MKLPackedGemm() {
+    cblas_sgemm_free(weightPacked_);
+    cblas_sgemm_free(weightTPacked_);
+  }
+};
+}  // namespace paddle
--- a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp
+++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp
--- a/paddle/gserver/layers/MKLPackedRecurrentLayer.h
+++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.h
@ -0,0 +1,131 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gflags/gflags.h>
+#include "Layer.h"
+#include "MKLPackedGemm.h"
+#include "SequenceToBatch.h"
+#include "paddle/utils/Stat.h"
+
+DECLARE_bool(rnn_use_batch);
+
+namespace paddle {
+
+/**
+ * @brief MKLPackedRecurrentLayer takes 1 input layer. The output size is the
+ * same with
+ * input layer.
+ * For each sequence [start, end] it performs the following computation:
+ * \f[
+ *    out_{i} = act(in_{i})     \      \      \text{for} \ i = start \\
+ *    out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end
+ *
+ * \f]
+ * If reversed is true, the order is reversed:
+ * \f[
+ *   out_{i} = act(in_{i})           \    \   \text{for} \ i = end  \\
+ *   out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end
+ * \f]
+ * There are two methods to calculate rnn. One way is to compute rnn one
+ * sequence by one sequence. The other way is to reorganize the input
+ * into batches, then compute rnn one batch by one batch. Users can select
+ * them by rnn_use_batch flag.
+ */
+
+class MKLPackedRecurrentLayer : public Layer {
+public:
+  explicit MKLPackedRecurrentLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+
+  void backward(const UpdateCallback& callback) override;
+
+  void resetState() override;
+
+  void setState(LayerStatePtr state) override;
+
+  LayerStatePtr getState() override;
+
+protected:
+  /**
+   * @brief If user do not set --rnn_use_batch=true, it will
+   * compute rnn forward one sequence by one sequence in default.
+   * @param batchSize Total words number of all samples in this batch.
+   * @param numSequences The sample number.
+   * @param starts Each start position of each samples.
+   */
+  void forwardSequence(int batchSize, size_t numSequences, const int* starts);
+  /**
+   * @brief Compute rnn forward by one sequence.
+   * @param start The start position of this sequence (or sample).
+   * @param length The length of this sequence (or sample), namely the words
+   * number of this sequence.
+   */
+  void forwardOneSequence(int start, int length);
+  /**
+   * @brief Compute rnn backward one sequence by onesequence.
+   * @param batchSize Total words number of all samples in this batch.
+   * @param numSequences The sample number.
+   * @param starts Each start position of each samples.
+   */
+  void backwardSequence(int batchSize, size_t numSequences, const int* starts);
+  /**
+   * @brief Compute rnn backward by one sequence.
+   * @param start The start position of this sequence (or sample).
+   * @param length The length of this sequence (or sample), namely the words
+   * number of this sequence.
+   */
+  void backwardOneSequence(int start, int length);
+
+  /**
+   * @brief Reorganize input into batches and compute rnn forward batch
+   * by batch. It will convert batch shape to sequence after finishing forward.
+   * The batch info can refer to SequenceToBatch class.
+   * @param batchSize Total words number of all samples in this batch.
+   * @param numSequences The sample number.
+   * @param starts Each start position of each samples.
+   */
+  void forwardBatch(int batchSize, size_t numSequences, const int* starts);
+
+  /**
+   * @brief Reorganize input into batches and compute rnn forward batch
+   * by batch.
+   * @param batchSize Total words number of all samples in this batch.
+   * @param numSequences The sample number.
+   * @param starts Each start position of each samples.
+   */
+  void backwardBatch(int batchSize, size_t numSequences, const int* starts);
+
+protected:
+  std::unique_ptr<Weight> weight_;
+  std::unique_ptr<Weight> bias_;
+
+  /// frameOutput_[i] is used to hold the i-th sample of output_
+  std::vector<Argument> frameOutput_;
+  MatrixPtr prevOutput_;
+  /// Whether compute rnn by reverse.
+  bool reversed_;
+  /// If compute batch by batch, batchValue_ will be used to save the
+  /// reorganized input value.
+  std::unique_ptr<SequenceToBatch> batchValue_;
+  /// If compute batch by batch, batchGrad_ will be used to save the
+  /// gradient with respect to reorganized input value.
+  std::unique_ptr<SequenceToBatch> batchGrad_;
+
+  std::unique_ptr<MKLPackedGemm> sgemm_packed_;
+};
+}