follow comments

9 years ago · f27fd9dc28
parent 8cde2d119f
commit f27fd9dc28
46 changed files with 222 additions and 251 deletions
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
  message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
  set(CBLAS_FOUND ON)
  if(${MKL_LAPACK_INC_DIR})
    add_definitions(-DPADDLE_USE_LAPACK)
    message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
  endif()
  return() # return file.
@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
  message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
  set(CBLAS_FOUND ON)
  if(ATLAS_CLAPACK_INC_DIR)
    add_definitions(-DPADDLE_USE_LAPACK)
    message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
  endif()
  return()
@ -114,7 +112,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
  message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
  set(CBLAS_FOUND ON)
  if(OPENBLAS_LAPACKE_INC_DIR)
    add_definitions(-DPADDLE_USE_LAPACK)
    message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
  endif()
  return()
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@ -27,8 +27,6 @@ IF(NOT ${CBLAS_FOUND})
        SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
    ENDIF(WIN32)
    ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
    ExternalProject_Add(
        openblas
        ${EXTERNAL_PROJECT_LOG_ARGS}
--- a/paddle/cuda/include/hl_activation_functions.h
+++ b/paddle/cuda/include/hl_activation_functions.h
@ -40,18 +40,18 @@ public:
 namespace gpu {
 static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
 static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
-}
+}  // namespace gpu
 #else
 namespace cpu {
 static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
 static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
-}
+}  // namespace cpu
 #ifdef __AVX__
 namespace avx {
 static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
 static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
-}
+}  // namespace avx
 #endif
 #endif
--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
@ -16,7 +16,7 @@ limitations under the License. */
 #include <sys/time.h>
 #include "hl_cuda.h"
 #include "hl_thread.ph"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Logging.h"
 namespace dynload {
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@ -17,7 +17,7 @@ limitations under the License. */
 #include <gflags/gflags.h>
 #include "hl_cuda_cudnn.ph"
 #include "hl_thread.ph"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Logging.h"
 DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
@ -24,7 +24,7 @@ limitations under the License. */
 #include "hl_cuda.ph"
 #include "hl_thread.ph"
 #include "paddle/utils/Logging.h"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 // clang-format on
 namespace dynload {
--- a/paddle/cuda/src/hl_warpctc_wrap.cc
+++ b/paddle/cuda/src/hl_warpctc_wrap.cc
@ -14,7 +14,7 @@ limitations under the License. */
 #include "hl_warpctc_wrap.h"
 #include <mutex>
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Logging.h"
 namespace dynload {
--- a/paddle/function/BufferArgTest.cpp
+++ b/paddle/function/BufferArgTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "BufferArg.h"
 #include <gtest/gtest.h>
 #include "BufferArg.h"
 #include "paddle/math/MemoryHandle.h"
 namespace paddle {
--- a/paddle/function/CosSimOp.cpp
+++ b/paddle/function/CosSimOp.cpp
@ -165,11 +165,11 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad,
      real reciprocal_square_sum_x = 1.0f / square_sum_x;
      real reciprocal_square_sum_y = 1.0f / square_sum_y;
      for (size_t j = 0; j < dim; ++j) {
-        prev_grad_x[j] +=
+        prev_grad_x[j] += out[i] * grad[i] *
-            out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy -
+                          (prev_out_y[j] * reciprocal_xy -
                           prev_out_x[j] * reciprocal_square_sum_x);
-        prev_grad_y[j] +=
+        prev_grad_y[j] += out[i] * grad[i] *
-            out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy -
+                          (prev_out_x[j] * reciprocal_xy -
                           prev_out_y[j] * reciprocal_square_sum_y);
      }
    }
--- a/paddle/function/FunctionTest.cpp
+++ b/paddle/function/FunctionTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "Function.h"
 #include <gtest/gtest.h>
 #include "Function.h"
 #include "paddle/math/SparseMatrix.h"
 namespace paddle {
--- a/paddle/function/TensorShapeTest.cpp
+++ b/paddle/function/TensorShapeTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "TensorShape.h"
 #include <gtest/gtest.h>
 #include "TensorShape.h"
 namespace paddle {
--- a/paddle/function/TensorTypeTest.cpp
+++ b/paddle/function/TensorTypeTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "TensorType.h"
 #include <gtest/gtest.h>
 #include "TensorType.h"
 namespace paddle {
--- a/paddle/gserver/dataproviders/PyDataProvider.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider.cpp
@ -194,8 +194,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) {
    auto& slot = slots_[j];
    CHECK(SlotDef::INDEX >= slot.type || SlotDef::STRING == slot.type)
        << " Slot type:" << slot.type << " is out of range.";
-    CHECK_GE(slot.type, SlotDef::VECTOR_DENSE) << " Slot type:" << slot.type
+    CHECK_GE(slot.type, SlotDef::VECTOR_DENSE)
-                                               << " is out of range.";
+        << " Slot type:" << slot.type << " is out of range.";
    switch (slot.type) {
      case SlotDef::VECTOR_DENSE:
        fillDenseSlot(slot, data, dataEnd);
--- a/paddle/gserver/evaluators/Evaluator.cpp
+++ b/paddle/gserver/evaluators/Evaluator.cpp
@ -446,8 +446,8 @@ real AucEvaluator::evalImp(std::vector<Argument>& arguments) {
  for (size_t i = 0; i < insNum; ++i) {
    real value = outputD[pos];
    uint32_t binIdx = static_cast<uint32_t>(value * kBinNum_);
-    CHECK(binIdx <= kBinNum_) << "bin index [" << binIdx
+    CHECK(binIdx <= kBinNum_)
-                              << "] out of range, predict value[" << value
+        << "bin index [" << binIdx << "] out of range, predict value[" << value
        << "]";
    real w = supportWeight ? weightD[i] : 1.0;
    if (labelD[i] == kNegativeLabel_) {
--- a/paddle/gserver/gradientmachines/GradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/GradientMachine.cpp
@ -21,7 +21,6 @@ limitations under the License. */
 #include "MultiGradientMachine.h"
 #include "MultiNetwork.h"
 #include "NeuralNetwork.h"
 #include "NeuralNetwork.h"
 #include "ParallelNeuralNetwork.h"
 #include "hl_gpu.h"
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@ -263,8 +263,9 @@ void Layer::zeroGrad() {
 }
 void Layer::initNeedFlags() {
-  auto initFlag = [this](
+  auto initFlag = [this](bool& flag,
-      bool& flag, bool (Layer::*flagQueryFunc)() const, ParameterType type) {
+                         bool (Layer::*flagQueryFunc)() const,
                         ParameterType type) {
    flag = false;
    if (biasParameter_ && biasParameter_->hasType(type)) {
      flag = true;
--- a/paddle/gserver/tests/test_RecurrentLayer.cpp
+++ b/paddle/gserver/tests/test_RecurrentLayer.cpp
@ -292,8 +292,8 @@ void checkRecurrentLayer(LayerConfig layerConfig,
  TestRecurrentLayer<T> testGpu(layerConfig, true, gpuBatch);
  testCpu.init(batchSize);
  testGpu.init(batchSize);
-  auto checkError = [](
+  auto checkError =
-      MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
+      [](MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
        CpuMatrix check(gpu->getHeight(), gpu->getWidth());
        check.copyFrom(*gpu);
        int height = cpu->getHeight();
@ -303,7 +303,8 @@ void checkRecurrentLayer(LayerConfig layerConfig,
        int count = 0;
        for (int i = 0; i < height; i++) {
          for (int j = 0; j < width; j++) {
-        if (fabs(data1[i * width + j] - data2[i * width + j]) / numSequences >
+            if (fabs(data1[i * width + j] - data2[i * width + j]) /
                    numSequences >
                1e-4) {
              count++;
            }
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@ -15,7 +15,7 @@ limitations under the License. */
 #include "MathFunctions.h"
 #include "hl_matrix_apply.cuh"
 #include "hl_matrix_ops.cuh"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 namespace dynload {
@ -41,24 +41,27 @@ void* lapack_dso_handle = nullptr;
  } __name;  // struct DynLoad__##__name
 // clang-format off
 #ifdef PADDLE_USE_LAPACK
 #ifdef PADDLE_USE_ATLAS
-  #define LAPACK_ROUTINE_EACH(__macro)        \
+  #define  PADDLE_SGETRF  clapack_sgetrf
-    __macro(clapack_sgetrf)                   \
+  #define  PADDLE_DGETRF  clapack_dgetrf
-    __macro(clapack_dgetrf)                   \
+  #define  PADDLE_SGETRI  clapack_sgetri
-    __macro(clapack_sgetri)                   \
+  #define  PADDLE_DGETRI  clapack_dgetri
    __macro(clapack_dgetri)
 #else
-  #define LAPACK_ROUTINE_EACH(__macro)        \
+  #define  PADDLE_SGETRF  LAPACKE_sgetrf
-    __macro(LAPACKE_sgetrf)                   \
+  #define  PADDLE_DGETRF  LAPACKE_dgetrf
-    __macro(LAPACKE_dgetrf)                   \
+  #define  PADDLE_SGETRI  LAPACKE_sgetri
-    __macro(LAPACKE_sgetri)                   \
+  #define  PADDLE_DGETRI  LAPACKE_dgetri
    __macro(LAPACKE_dgetri)
 #endif
 LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
 #endif  
 #define LAPACK_ROUTINE_EACH(__macro)       \
  __macro(PADDLE_SGETRF)                   \
  __macro(PADDLE_DGETRF)                   \
  __macro(PADDLE_SGETRI)                   \
  __macro(PADDLE_DGETRI)
 // clang-format on
 LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
 }  // namespace dynload
 namespace paddle {
@ -130,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order,
                 float* A,
                 const int lda,
                 int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
+  return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv);
 #ifdef PADDLE_USE_ATLAS
  return dynload::clapack_sgetrf(order, M, N, A, lda, ipiv);
 #else
  return dynload::LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
 #endif
 #else
  LOG(FATAL) << "Not implemented";
 #endif
  return 0;
 }
 template <>
@ -149,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order,
                  double* A,
                  const int lda,
                  int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
+  return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv);
 #ifdef PADDLE_USE_ATLAS
  return dynload::clapack_dgetrf(order, M, N, A, lda, ipiv);
 #else
  return dynload::LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
 #endif
 #else
  LOG(FATAL) << "Not implemented";
 #endif
  return 0;
 }
 template <>
@ -167,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order,
                 float* A,
                 const int lda,
                 const int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
+  return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv);
 #ifdef PADDLE_USE_ATLAS
  return dynload::clapack_sgetri(order, N, A, lda, ipiv);
 #else
  return dynload::LAPACKE_sgetri(order, N, A, lda, ipiv);
 #endif
 #else
  LOG(FATAL) << "Not implemented";
 #endif
  return 0;
 }
 template <>
@ -185,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order,
                  double* A,
                  const int lda,
                  const int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
+  return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
 #ifdef PADDLE_USE_ATLAS
  return dynload::clapack_dgetri(order, N, A, lda, ipiv);
 #else
  return dynload::LAPACKE_dgetri(order, N, A, lda, ipiv);
 #endif
 #else
  LOG(FATAL) << "Not implemented";
 #endif
  return 0;
 }
--- a/paddle/math/MathFunctions.h
+++ b/paddle/math/MathFunctions.h
@ -17,14 +17,11 @@ limitations under the License. */
 #ifdef PADDLE_USE_MKL
 #include <mkl.h>
 #ifdef PADDLE_USE_LAPACK
 #include <mkl_lapacke.h>
 #endif
 #else
 extern "C" {
 #include <cblas.h>
 }
 #ifdef PADDLE_USE_LAPACK
 #ifdef PADDLE_USE_ATLAS
 extern "C" {
 #include <clapack.h>
@ -33,7 +30,6 @@ extern "C" {
 #include <lapacke.h>
 #endif
 #endif
 #endif
 #include <cmath>
--- a/paddle/math/MatrixBitCode.cpp
+++ b/paddle/math/MatrixBitCode.cpp
@ -174,8 +174,10 @@ void CpuMatrix::mulByBitCode(size_t numClasses,
                             const IVector& codes,
                             const Matrix& weight,
                             const Matrix& input) {
-  auto op = [](
+  auto op = [](real& t,
-      real& t, const real* weightRow, const real* inputRow, size_t inputDim) {
+               const real* weightRow,
               const real* inputRow,
               size_t inputDim) {
    real sum = 0;
    for (size_t k = 0; k < inputDim; ++k) {
      sum += weightRow[k] * inputRow[k];
@ -193,8 +195,8 @@ void CpuMatrix::mulByBitCodeBackwardWeight(size_t numClasses,
                                           const IVector& codes,
                                           Matrix& weight,
                                           const Matrix& input) {
-  auto op = [](
+  auto op =
-      const real t, real* weightRow, const real* inputRow, size_t inputDim) {
+      [](const real t, real* weightRow, const real* inputRow, size_t inputDim) {
        for (size_t k = 0; k < inputDim; ++k) {
          weightRow[k] += t * inputRow[k];
        }
@ -210,8 +212,8 @@ void CpuMatrix::mulByBitCodeBackwardError(size_t numClasses,
                                          const IVector& codes,
                                          const Matrix& weight,
                                          Matrix& input) {
-  auto op = [](
+  auto op =
-      const real t, const real* weightRow, real* inputRow, size_t inputDim) {
+      [](const real t, const real* weightRow, real* inputRow, size_t inputDim) {
        for (size_t k = 0; k < inputDim; ++k) {
          inputRow[k] += t * weightRow[k];
        }
--- a/paddle/math/tests/TensorCheck.h
+++ b/paddle/math/tests/TensorCheck.h
@ -183,8 +183,8 @@ void TensorCheck(AssertEq compare,
 template <typename AssertEq>
 void TensorCheck(AssertEq compare, real args1, real args2) {
-  EXPECT_EQ(compare(args1, args2), true) << "[Test error] args1 = " << args1
+  EXPECT_EQ(compare(args1, args2), true)
-                                         << ", args2 = " << args2;
+      << "[Test error] args1 = " << args1 << ", args2 = " << args2;
 }
 template <typename AssertEq>
--- a/paddle/math/tests/test_SIMDFunctions.cpp
+++ b/paddle/math/tests/test_SIMDFunctions.cpp
@ -126,13 +126,13 @@ TEST(SIMDFunction, decayL1_WithLR) {
  typedef std::function<void(float*, float*, float*, float, size_t)>
      DecayL1MethodType;
-  DecayL1MethodType naive = [](
+  DecayL1MethodType naive =
-      float* d, float* s, float* lr, float l, size_t len) {
+      [](float* d, float* s, float* lr, float l, size_t len) {
        paddle::simd::naive::decayL1<float>(d, s, lr, l, len);
      };
-  DecayL1MethodType simd = [](
+  DecayL1MethodType simd =
-      float* d, float* s, float* lr, float l, size_t len) {
+      [](float* d, float* s, float* lr, float l, size_t len) {
        paddle::simd::decayL1<float>(d, s, lr, l, len);
      };
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@ -21,6 +21,7 @@ limitations under the License. */
 #include "paddle/math/Matrix.h"
 #include "paddle/math/SparseMatrix.h"
 #include "paddle/testing/TestUtil.h"
 #include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/utils/Util.h"
@ -235,10 +236,15 @@ TEST(Matrix, unary) {
      testMatrixTranspose(height, width);
      testMatrixRotate(height, width);
    }
-// inverse
+    // inverse matrix
-#ifdef PADDLE_USE_LAPACK
+    void** dso_handler = nullptr;
    GetLapackDsoHandle(dso_handler);
    if (nullptr == *dso_handler) {
      LOG(WARNING) << "Failed to find liblapack.so, please specify its path "
                      "using LD_LIBRARY_PATH.";
    } else {
      testMatrixInverse(height);
-#endif
+    }
  }
 }
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@ -395,7 +395,8 @@ void Argument::concat(const std::vector<Argument>& args,
    tmpMatrix->copyFrom(*src, stream);
  };
-  auto copyIds = [batchSize, stream](
+  auto copyIds =
      [batchSize, stream](
          IVectorPtr& dst, const IVectorPtr& src, int startRow, bool useGpu) {
        if (!src) {
          dst.reset();
@ -405,7 +406,8 @@ void Argument::concat(const std::vector<Argument>& args,
        dst->subVec(startRow, src->getSize())->copyFrom(*src, stream);
      };
-  auto copyStrs = [batchSize, stream](
+  auto copyStrs =
      [batchSize, stream](
          SVectorPtr& dst, const SVectorPtr& src, int startRow, bool useGpu) {
        if (!src) {
          dst.reset();
--- a/paddle/parameter/AverageOptimizer.cpp
+++ b/paddle/parameter/AverageOptimizer.cpp
@ -155,8 +155,9 @@ ParameterOptimizer::TraverseCallback AverageOptimizer::restore() {
    return nullptr;
  }
-  return [](
+  return [](const VectorPtr vecs[],
-      const VectorPtr vecs[], const ParameterConfig& config, size_t sparseId) {
+            const ParameterConfig& config,
            size_t sparseId) {
    vecs[PARAMETER_VALUE]->copyFrom(*vecs[PARAMETER_GRADIENT]);
    vecs[PARAMETER_GRADIENT]->zeroMem();
  };
--- a/Show More
+++ b/Show More