From 53b0e427092219b402f0ed6fab4235c3b70fdc7c Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 17 Aug 2017 16:19:59 +0800 Subject: [PATCH 1/5] Add EigenGemm. --- paddle/function/EigenGemm.cpp | 92 ++++++++++++++++++++++++++++++ paddle/function/GemmFunctor.cpp | 85 ++++++++++++++++++++++++++++ paddle/function/GemmFunctor.h | 99 +++++++++++---------------------- 3 files changed, 211 insertions(+), 65 deletions(-) create mode 100644 paddle/function/EigenGemm.cpp create mode 100644 paddle/function/GemmFunctor.cpp diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp new file mode 100644 index 0000000000..0b4220fcbe --- /dev/null +++ b/paddle/function/EigenGemm.cpp @@ -0,0 +1,92 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { + +template +struct EigenBlasGemm { + typedef Eigen::TensorMap, + Eigen::Aligned> + Matrix; + + static void compute(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const T alpha, + const T* A, + const int lda, + const T* B, + const int ldb, + const T beta, + T* C, + const int ldc) { + Eigen::array sizeA; + if (transA) { + sizeA[0] = K; + sizeA[1] = M; + CHECK_EQ(M, lda); + } else { + sizeA[0] = M; + sizeA[1] = K; + CHECK_EQ(K, lda); + } + Eigen::array sizeB; + if (transB) { + sizeB[0] = N; + sizeB[1] = K; + CHECK_EQ(K, ldb); + } else { + sizeB[0] = K; + sizeB[1] = N; + CHECK_EQ(N, ldb); + } + Eigen::array sizeC; + sizeC[0] = M; + sizeC[1] = N; + CHECK_EQ(N, ldc); + + const Matrix a(const_cast(A), sizeA); + const Matrix b(const_cast(B), sizeB); + Matrix c(C, sizeC); + + typedef typename Eigen::Tensor::DimensionPair DimPair; + Eigen::array dims; + dims[0] = DimPair(1, 0); + dims[0].first = transA ? 0 : 1; + dims[0].second = transB ? 1 : 0; + + Eigen::DefaultDevice device; + if (alpha == T(1) && beta == T(0)) { + c.device(device) = a.contract(b, dims); + } else if (alpha == T(1) && beta == T(1)) { + c.device(device) += a.contract(b, dims); + } else { + c.device(device) = + c.constant(alpha) * a.contract(b, dims) + c.constant(beta) * c; + } + } +}; + +#ifdef PADDLE_TYPE_DOUBLE +template class EigenBlasGemm; +#else +template class EigenBlasGemm; +#endif + +} // namespace paddle diff --git a/paddle/function/GemmFunctor.cpp b/paddle/function/GemmFunctor.cpp new file mode 100644 index 0000000000..8df9b884fe --- /dev/null +++ b/paddle/function/GemmFunctor.cpp @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "GemmFunctor.h" +#include "paddle/math/MathFunctions.h" + +namespace paddle { + +template +struct BlasGemm { + static void compute(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const T alpha, + const T* A, + const int lda, + const T* B, + const int ldb, + const T beta, + T* C, + const int ldc) { + gemm(transA == false ? CblasNoTrans : CblasTrans, + transB == false ? CblasNoTrans : CblasTrans, + M, + N, + K, + alpha, + A, + lda, + B, + ldb, + beta, + C, + ldc); + } +}; + +template +struct BlasGemm { + static void compute(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const T alpha, + const T* A, + const int lda, + const T* B, + const int ldb, + const T beta, + T* C, + const int ldc) { + hl_matrix_mul((T*)A, + transA == false ? HPPL_OP_N : HPPL_OP_T, + (T*)B, + transB == false ? HPPL_OP_N : HPPL_OP_T, + C, + M, + N, + K, + alpha, + beta, + lda, + ldb, + ldc); + } +}; + +template class BlasGemm; +template class BlasGemm; + +} // namespace paddle diff --git a/paddle/function/GemmFunctor.h b/paddle/function/GemmFunctor.h index d5db5cf5e7..0809953b4e 100644 --- a/paddle/function/GemmFunctor.h +++ b/paddle/function/GemmFunctor.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/math/MathFunctions.h" +#include "TensorType.h" namespace paddle { @@ -24,73 +24,42 @@ namespace paddle { // of MatMulFunction, we need to consider the reconstruction of hl_matrix_mul // interface. template -class GemmFunctor { -public: - void operator()(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE TransB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc); +struct BlasGemm { + static void compute(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const T alpha, + const T* A, + const int lda, + const T* B, + const int ldb, + const T beta, + T* C, + const int ldc); }; +// TODO(hedaoyuan): Since the definition of the real type in the Paddle +// conflicts with the Eigen library, so compile the Eigen code can not +// include the Paddle header file. And need an EigenBlasGemm template class +// that does not contain the DeviceType parameter. +// I will fix this problem and merge BlasGemm and EigenBlasGemm into one. template -class GemmFunctor { -public: - void operator()(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE TransB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc) { - gemm(transA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); - } -}; - -template -class GemmFunctor { -public: - void operator()(const CBLAS_TRANSPOSE transA, - const CBLAS_TRANSPOSE TransB, - const int M, - const int N, - const int K, - const T alpha, - const T* A, - const int lda, - const T* B, - const int ldb, - const T beta, - T* C, - const int ldc) { - hl_matrix_mul((T*)A, - transA == CblasNoTrans ? HPPL_OP_N : HPPL_OP_T, - (T*)B, - TransB == CblasNoTrans ? HPPL_OP_N : HPPL_OP_T, - C, - M, - N, - K, - alpha, - beta, - lda, - ldb, - ldc); - } +struct EigenBlasGemm { + static void compute(const bool transA, + const bool transB, + const int M, + const int N, + const int K, + const T alpha, + const T* A, + const int lda, + const T* B, + const int ldb, + const T beta, + T* C, + const int ldc); }; } // namespace paddle From ec2ba242060fc10b2045533fdcb410cfbd473cec Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 17 Aug 2017 16:22:30 +0800 Subject: [PATCH 2/5] Fix GemmConvFunction. --- paddle/function/CMakeLists.txt | 2 + paddle/function/GemmConvOp.cpp | 82 ++++++++++++++++------------------ 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 7dfb6f61c5..9187294a49 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -4,6 +4,8 @@ file(GLOB cpp_files . *Op.cpp) list(APPEND h_files Function.h) list(APPEND cpp_files Function.cpp) list(APPEND cpp_files BufferArg.cpp) +list(APPEND cpp_files GemmFunctor.cpp) +list(APPEND cpp_files EigenGemm.cpp) if(WITH_GPU) file(GLOB cu_files . *OpGpu.cu) diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp index 0ada4d70a0..f8cf4ebea8 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/function/GemmConvOp.cpp @@ -85,7 +85,6 @@ public: } Im2ColFunctor im2col; - GemmFunctor gemm; size_t inputOffset = imShape.getElements(); size_t outputOffset = (outputChannels / groups_) * outputHeight * outputWidth; @@ -108,19 +107,19 @@ public: int M = outputChannels / groups_; int N = outputHeight * outputWidth; int K = inputChannels / groups_ * filterHeight * filterWidth; - gemm(CblasNoTrans, - CblasNoTrans, - M, - N, - K, - 1.0f, - filterData + g * filterOffset, - K, - colData, - N, - beta, - outputData + g * outputOffset, - N); + BlasGemm::compute(false, + false, + M, + N, + K, + 1.0f, + filterData + g * filterOffset, + K, + colData, + N, + beta, + outputData + g * outputOffset, + N); } inputData += inputChannels * inputHeight * inputWidth; outputData += outputChannels * outputHeight * outputWidth; @@ -188,8 +187,6 @@ public: } Col2ImFunctor col2im; - GemmFunctor gemm; - size_t inputOffset = imShape.getElements(); size_t outputOffset = (outputChannels / groups_) * outputHeight * outputWidth; @@ -205,19 +202,19 @@ public: colData = inputGrad + g * inputOffset; scale = 1.0f; } - gemm(CblasTrans, - CblasNoTrans, - M, - N, - K, - 1.0f, - filterData + g * filterOffset, - M, - outputGrad + g * outputOffset, - N, - scale, - colData, - N); + BlasGemm::compute(true, + false, + M, + N, + K, + 1.0f, + filterData + g * filterOffset, + M, + outputGrad + g * outputOffset, + N, + scale, + colData, + N); if (needIm2col) { col2im(inputGrad + g * inputOffset, imShape, @@ -299,7 +296,6 @@ public: } Im2ColFunctor im2col; - GemmFunctor gemm; size_t inputOffset = imShape.getElements(); size_t outputOffset = (outputChannels / groups_) * outputHeight * outputWidth; @@ -321,19 +317,19 @@ public: int M = outputChannels / groups_; int K = outputHeight * outputWidth; int N = inputChannels / groups_ * filterHeight * filterWidth; - gemm(CblasNoTrans, - CblasTrans, - M, - N, - K, - 1.0f, - outputGrad + g * outputOffset, - K, - colData, - K, - i == 0 ? beta : 1.0f, - filterGrad + g * filterOffset, - N); + BlasGemm::compute(false, + true, + M, + N, + K, + 1.0f, + outputGrad + g * outputOffset, + K, + colData, + K, + i == 0 ? beta : 1.0f, + filterGrad + g * filterOffset, + N); } inputData += inputChannels * inputHeight * inputWidth; outputGrad += outputChannels * outputHeight * outputWidth; From adcca2cc064182cd75809dd1e3d8c64329a0b0de Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 17 Aug 2017 16:40:38 +0800 Subject: [PATCH 3/5] Add PADDLE_USE_EIGEN_FOR_BLAS macro. --- CMakeLists.txt | 1 + cmake/configure.cmake | 4 ++++ paddle/function/GemmFunctor.cpp | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index dcd1218a5b..28bbfd7916 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,7 @@ option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) +option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 209f9078a6..51c3b918cc 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -28,6 +28,10 @@ if(NOT WITH_TIMER) add_definitions(-DPADDLE_DISABLE_TIMER) endif(NOT WITH_TIMER) +if(USE_EIGEN_FOR_BLAS) + add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) +endif(USE_EIGEN_FOR_BLAS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/paddle/function/GemmFunctor.cpp b/paddle/function/GemmFunctor.cpp index 8df9b884fe..dc83278d8e 100644 --- a/paddle/function/GemmFunctor.cpp +++ b/paddle/function/GemmFunctor.cpp @@ -32,6 +32,10 @@ struct BlasGemm { const T beta, T* C, const int ldc) { +#ifdef PADDLE_USE_EIGEN_FOR_BLAS + EigenBlasGemm::compute( + transA, transB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); +#else gemm(transA == false ? CblasNoTrans : CblasTrans, transB == false ? CblasNoTrans : CblasTrans, M, @@ -45,6 +49,7 @@ struct BlasGemm { beta, C, ldc); +#endif } }; From 6ba04dcd112e0caac46a7a829182ce00f301752f Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 17 Aug 2017 16:56:46 +0800 Subject: [PATCH 4/5] Remove the header files that do not need to be included. --- paddle/function/DepthwiseConvOp.cpp | 1 - paddle/function/DepthwiseConvOpGpu.cu | 1 - 2 files changed, 2 deletions(-) diff --git a/paddle/function/DepthwiseConvOp.cpp b/paddle/function/DepthwiseConvOp.cpp index 490e8d546c..2f3112fe65 100644 --- a/paddle/function/DepthwiseConvOp.cpp +++ b/paddle/function/DepthwiseConvOp.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "DepthwiseConvOp.h" #include "ConvOp.h" -#include "GemmFunctor.h" namespace paddle { diff --git a/paddle/function/DepthwiseConvOpGpu.cu b/paddle/function/DepthwiseConvOpGpu.cu index 33463805cb..2d722dfcfc 100644 --- a/paddle/function/DepthwiseConvOpGpu.cu +++ b/paddle/function/DepthwiseConvOpGpu.cu @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "DepthwiseConvOp.h" -#include "GemmFunctor.h" #include "paddle/math/BaseMatrix.h" namespace paddle { From 430e0e418bb34d6a14662a29a3e6d5fb906c9610 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 21 Aug 2017 10:12:25 +0800 Subject: [PATCH 5/5] Follow comments. --- paddle/function/CMakeLists.txt | 4 +++- paddle/function/EigenGemm.cpp | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 9187294a49..c572a9d433 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -5,7 +5,9 @@ list(APPEND h_files Function.h) list(APPEND cpp_files Function.cpp) list(APPEND cpp_files BufferArg.cpp) list(APPEND cpp_files GemmFunctor.cpp) -list(APPEND cpp_files EigenGemm.cpp) +if(USE_EIGEN_FOR_BLAS) + list(APPEND cpp_files EigenGemm.cpp) +endif(USE_EIGEN_FOR_BLAS) if(WITH_GPU) file(GLOB cu_files . *OpGpu.cu) diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index 0b4220fcbe..674141ed39 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -77,8 +77,7 @@ struct EigenBlasGemm { } else if (alpha == T(1) && beta == T(1)) { c.device(device) += a.contract(b, dims); } else { - c.device(device) = - c.constant(alpha) * a.contract(b, dims) + c.constant(beta) * c; + c.device(device) = alpha * a.contract(b, dims) + beta * c; } } };