follow comments

8 years ago · f27fd9dc28
parent 8cde2d119f
commit f27fd9dc28
46 changed files with 222 additions and 251 deletions
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@ -44,7 +44,6 @@ if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
  message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
  set(CBLAS_FOUND ON)
  if(${MKL_LAPACK_INC_DIR})
-    add_definitions(-DPADDLE_USE_LAPACK)
    message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
  endif()
  return() # return file.
@ -80,7 +79,6 @@ if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
  message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
  set(CBLAS_FOUND ON)
  if(ATLAS_CLAPACK_INC_DIR)
-    add_definitions(-DPADDLE_USE_LAPACK)
    message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
  endif()
  return()
@ -114,7 +112,6 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
  message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
  set(CBLAS_FOUND ON)
  if(OPENBLAS_LAPACKE_INC_DIR)
-    add_definitions(-DPADDLE_USE_LAPACK)
    message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
  endif()
  return()
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@ -27,8 +27,6 @@ IF(NOT ${CBLAS_FOUND})
        SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
    ENDIF(WIN32)

-    ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
-
    ExternalProject_Add(
        openblas
        ${EXTERNAL_PROJECT_LOG_ARGS}
--- a/paddle/cuda/include/hl_activation_functions.h
+++ b/paddle/cuda/include/hl_activation_functions.h
@ -40,18 +40,18 @@ public:
 namespace gpu {
 static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
 static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
-}
+}  // namespace gpu
 #else
 namespace cpu {
 static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
 static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
-}
+}  // namespace cpu

 #ifdef __AVX__
 namespace avx {
 static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
 static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
-}
+}  // namespace avx
 #endif
 #endif

--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
@ -273,23 +273,23 @@ extern void hl_bilinear_forward(const real* inData,
                                const real ratioW);

 /**
-* @brief   Bilinear interpolation backward.
-*
-* @param[out]  inGrad      input gradient.
-* @param[in]   inImgH      input image height.
-* @param[in]   inImgW      input image width.
-* @param[in]   inputH      input batchSize.
-* @param[in]   inputW      input image data dim.
-* @param[in]   outGrad     output gradient.
-* @param[in]   outImgH     output image height.
-* @param[in]   outImgW     output image width.
-* @param[in]   outputH     output batchSize.
-* @param[in]   outputW     output image data dim.
-* @param[in]   numChannels number of channels.
-* @param[in]   ratioH      inImgH / outImgH.
-* @param[in]   ratioW      inImgW / outImgW.
-*
-*/
+ * @brief   Bilinear interpolation backward.
+ *
+ * @param[out]  inGrad      input gradient.
+ * @param[in]   inImgH      input image height.
+ * @param[in]   inImgW      input image width.
+ * @param[in]   inputH      input batchSize.
+ * @param[in]   inputW      input image data dim.
+ * @param[in]   outGrad     output gradient.
+ * @param[in]   outImgH     output image height.
+ * @param[in]   outImgW     output image width.
+ * @param[in]   outputH     output batchSize.
+ * @param[in]   outputW     output image data dim.
+ * @param[in]   numChannels number of channels.
+ * @param[in]   ratioH      inImgH / outImgH.
+ * @param[in]   ratioW      inImgW / outImgW.
+ *
+ */
 extern void hl_bilinear_backward(real* inGrad,
                                 const size_t inImgH,
                                 const size_t inImgW,
--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
@ -16,7 +16,7 @@ limitations under the License. */
 #include <sys/time.h>
 #include "hl_cuda.h"
 #include "hl_thread.ph"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Logging.h"

 namespace dynload {
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@ -17,7 +17,7 @@ limitations under the License. */
 #include <gflags/gflags.h>
 #include "hl_cuda_cudnn.ph"
 #include "hl_thread.ph"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Logging.h"

 DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
@ -24,7 +24,7 @@ limitations under the License. */
 #include "hl_cuda.ph"
 #include "hl_thread.ph"
 #include "paddle/utils/Logging.h"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 // clang-format on

 namespace dynload {
--- a/paddle/cuda/src/hl_warpctc_wrap.cc
+++ b/paddle/cuda/src/hl_warpctc_wrap.cc
@ -14,7 +14,7 @@ limitations under the License. */

 #include "hl_warpctc_wrap.h"
 #include <mutex>
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"
 #include "paddle/utils/Logging.h"

 namespace dynload {
--- a/paddle/function/BufferArgTest.cpp
+++ b/paddle/function/BufferArgTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "BufferArg.h"
 #include <gtest/gtest.h>
+#include "BufferArg.h"
 #include "paddle/math/MemoryHandle.h"

 namespace paddle {
--- a/paddle/function/CosSimOp.cpp
+++ b/paddle/function/CosSimOp.cpp
@ -165,12 +165,12 @@ void CosSimBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad,
      real reciprocal_square_sum_x = 1.0f / square_sum_x;
      real reciprocal_square_sum_y = 1.0f / square_sum_y;
      for (size_t j = 0; j < dim; ++j) {
-        prev_grad_x[j] +=
-            out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy -
-                                prev_out_x[j] * reciprocal_square_sum_x);
-        prev_grad_y[j] +=
-            out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy -
-                                prev_out_y[j] * reciprocal_square_sum_y);
+        prev_grad_x[j] += out[i] * grad[i] *
+                          (prev_out_y[j] * reciprocal_xy -
+                           prev_out_x[j] * reciprocal_square_sum_x);
+        prev_grad_y[j] += out[i] * grad[i] *
+                          (prev_out_x[j] * reciprocal_xy -
+                           prev_out_y[j] * reciprocal_square_sum_y);
      }
    }
  }
--- a/paddle/function/FunctionTest.cpp
+++ b/paddle/function/FunctionTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "Function.h"
 #include <gtest/gtest.h>
+#include "Function.h"
 #include "paddle/math/SparseMatrix.h"

 namespace paddle {
--- a/paddle/function/MulOpTest.cpp
+++ b/paddle/function/MulOpTest.cpp
@ -74,9 +74,9 @@ TEST(MulOp, DDDMatrixMul) {
 }

 /**
-  * C += A * B, B, C dense, A sparse
-  * dense = sparse * dense
-  */
+ * C += A * B, B, C dense, A sparse
+ * dense = sparse * dense
+ */
 void testFuncDSparseDMatrix(
    size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
  real scaleT = 1.0;
@ -119,9 +119,9 @@ TEST(MuLOp, DSparseDMul) {
 }

 /**
-  * C += A * B, A, C dense, B sparse
-  * dense = dense * sparse
-  */
+ * C += A * B, A, C dense, B sparse
+ * dense = dense * sparse
+ */
 void testFuncDDSparseMatrix(
    size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
  real scaleT = 1.0;
@ -165,9 +165,9 @@ TEST(MulOp, DDSparseMul) {
 }

 /**
-  * C += A * B, A sparse, B, C dense
-  * sparse = dense * dense
-  */
+ * C += A * B, A sparse, B, C dense
+ * sparse = dense * dense
+ */
 void testFuncSparseDDMatrix(
    size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
  real scaleT = 1.0;
--- a/paddle/function/TensorShapeTest.cpp
+++ b/paddle/function/TensorShapeTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "TensorShape.h"
 #include <gtest/gtest.h>
+#include "TensorShape.h"

 namespace paddle {

--- a/paddle/function/TensorTypeTest.cpp
+++ b/paddle/function/TensorTypeTest.cpp
@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "TensorType.h"
 #include <gtest/gtest.h>
+#include "TensorType.h"

 namespace paddle {

--- a/paddle/gserver/dataproviders/PyDataProvider.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider.cpp
@ -194,8 +194,8 @@ void PyDataProvider::fillSlotsByStr(const std::string& samples) {
    auto& slot = slots_[j];
    CHECK(SlotDef::INDEX >= slot.type || SlotDef::STRING == slot.type)
        << " Slot type:" << slot.type << " is out of range.";
-    CHECK_GE(slot.type, SlotDef::VECTOR_DENSE) << " Slot type:" << slot.type
-                                               << " is out of range.";
+    CHECK_GE(slot.type, SlotDef::VECTOR_DENSE)
+        << " Slot type:" << slot.type << " is out of range.";
    switch (slot.type) {
      case SlotDef::VECTOR_DENSE:
        fillDenseSlot(slot, data, dataEnd);
--- a/paddle/gserver/evaluators/Evaluator.cpp
+++ b/paddle/gserver/evaluators/Evaluator.cpp
@ -446,9 +446,9 @@ real AucEvaluator::evalImp(std::vector<Argument>& arguments) {
  for (size_t i = 0; i < insNum; ++i) {
    real value = outputD[pos];
    uint32_t binIdx = static_cast<uint32_t>(value * kBinNum_);
-    CHECK(binIdx <= kBinNum_) << "bin index [" << binIdx
-                              << "] out of range, predict value[" << value
-                              << "]";
+    CHECK(binIdx <= kBinNum_)
+        << "bin index [" << binIdx << "] out of range, predict value[" << value
+        << "]";
    real w = supportWeight ? weightD[i] : 1.0;
    if (labelD[i] == kNegativeLabel_) {
      statNeg_[binIdx] += w;
--- a/paddle/gserver/gradientmachines/GradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/GradientMachine.cpp
@ -21,7 +21,6 @@ limitations under the License. */
 #include "MultiGradientMachine.h"
 #include "MultiNetwork.h"
 #include "NeuralNetwork.h"
-#include "NeuralNetwork.h"
 #include "ParallelNeuralNetwork.h"
 #include "hl_gpu.h"

--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@ -637,7 +637,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
 /* create scattered id infomation for all realLayer of inFrameLines one time.
 * If hasSubseq, will also create scattered sequenceStartPositions infomation
 * for all realLayer of inFrameLines one time.
-*/
+ */

 void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
                                                 const Argument& input,
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@ -107,18 +107,18 @@ public:
      DropCallback;

  /**
-    * @brief NormOrDropNodeCallback
-    *
-    * Normalize a path's probabilities or just drop it by modifying path.logProb
-    *
-    * The first parameter is sequence index in a batch
-    *
-    * The second parameter is path.ids
-    *
-    * The third parameter is probabilites for each node in this path.
-    *
-    * The fourth parameter is the probability of the whole path.
-    */
+   * @brief NormOrDropNodeCallback
+   *
+   * Normalize a path's probabilities or just drop it by modifying path.logProb
+   *
+   * The first parameter is sequence index in a batch
+   *
+   * The second parameter is path.ids
+   *
+   * The third parameter is probabilites for each node in this path.
+   *
+   * The fourth parameter is the probability of the whole path.
+   */
  typedef std::function<void(
      int seqId, const std::vector<int>&, std::vector<real>&, real*)>
      NormOrDropNodeCallback;
@ -348,9 +348,9 @@ protected:
  int targetInfoInlinkId_;

  /* create scattered id infomation for all realLayer of inFrameLines one time.
-  *  If hasSubseq, will also create scattered sequenceStartPositions infomation
-  *  for all realLayer of inFrameLines one time.
-  */
+   *  If hasSubseq, will also create scattered sequenceStartPositions infomation
+   *  for all realLayer of inFrameLines one time.
+   */
  void createInFrameInfo(int inlinks_id,
                         const Argument& input,
                         PassType passType);
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@ -263,8 +263,9 @@ void Layer::zeroGrad() {
 }

 void Layer::initNeedFlags() {
-  auto initFlag = [this](
-      bool& flag, bool (Layer::*flagQueryFunc)() const, ParameterType type) {
+  auto initFlag = [this](bool& flag,
+                         bool (Layer::*flagQueryFunc)() const,
+                         ParameterType type) {
    flag = false;
    if (biasParameter_ && biasParameter_->hasType(type)) {
      flag = true;
--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@ -106,9 +106,9 @@ protected:

 public:
  /**
-    * Wait until all input value ready.
-    * Called before Layer::forward() function.
-    */
+   * Wait until all input value ready.
+   * Called before Layer::forward() function.
+   */
  virtual void waitInputValue();

  /**
@ -118,9 +118,9 @@ public:
  virtual void copyOutputToOtherDevice();

  /**
-    * Wait until all output grad ready and merge them to output_.grad.
-    * Called before Layer::backward() function.
-    */
+   * Wait until all output grad ready and merge them to output_.grad.
+   * Called before Layer::backward() function.
+   */
  virtual void waitAndMergeOutputGrad();

  /**
--- a/paddle/gserver/layers/RotateLayer.h
+++ b/paddle/gserver/layers/RotateLayer.h
@ -29,7 +29,7 @@ namespace paddle {
 *
 * The config file api is rotate_layer
 *
-*/
+ */

 class RotateLayer : public Layer {
 public:
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@ -60,7 +60,7 @@ void SequencePoolLayer::forward(PassType passType) {
   * thus, in this case, output_ has no sequenceStartPositions.
   * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
   * case, we should compute the new sequenceStartPositions.
-  */
+   */
  if (type_) {
    CHECK(input.subSequenceStartPositions)
        << "when trans_type = seq, input must hasSubseq";
--- a/paddle/gserver/tests/test_RecurrentLayer.cpp
+++ b/paddle/gserver/tests/test_RecurrentLayer.cpp
@ -292,26 +292,27 @@ void checkRecurrentLayer(LayerConfig layerConfig,
  TestRecurrentLayer<T> testGpu(layerConfig, true, gpuBatch);
  testCpu.init(batchSize);
  testGpu.init(batchSize);
-  auto checkError = [](
-      MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
-    CpuMatrix check(gpu->getHeight(), gpu->getWidth());
-    check.copyFrom(*gpu);
-    int height = cpu->getHeight();
-    int width = cpu->getWidth();
-    const real* data1 = cpu->getData();
-    const real* data2 = check.getData();
-    int count = 0;
-    for (int i = 0; i < height; i++) {
-      for (int j = 0; j < width; j++) {
-        if (fabs(data1[i * width + j] - data2[i * width + j]) / numSequences >
-            1e-4) {
-          count++;
+  auto checkError =
+      [](MatrixPtr cpu, MatrixPtr gpu, int numSequences, const char* str) {
+        CpuMatrix check(gpu->getHeight(), gpu->getWidth());
+        check.copyFrom(*gpu);
+        int height = cpu->getHeight();
+        int width = cpu->getWidth();
+        const real* data1 = cpu->getData();
+        const real* data2 = check.getData();
+        int count = 0;
+        for (int i = 0; i < height; i++) {
+          for (int j = 0; j < width; j++) {
+            if (fabs(data1[i * width + j] - data2[i * width + j]) /
+                    numSequences >
+                1e-4) {
+              count++;
+            }
+          }
        }
-      }
-    }
-    EXPECT_EQ(count, 0) << "[" << str << "]"
-                        << "There are " << count << " different element.";
-  };
+        EXPECT_EQ(count, 0) << "[" << str << "]"
+                            << "There are " << count << " different element.";
+      };
  T* cpuLayer = dynamic_cast<T*>(testCpu.testLayer_.get());
  T* gpuLayer = dynamic_cast<T*>(testGpu.testLayer_.get());

--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@ -15,7 +15,7 @@ limitations under the License. */
 #include "MathFunctions.h"
 #include "hl_matrix_apply.cuh"
 #include "hl_matrix_ops.cuh"
-#include "paddle/utils/DynamicLoad.h"
+#include "paddle/utils/DynamicLoader.h"

 namespace dynload {

@ -32,7 +32,7 @@ void* lapack_dso_handle = nullptr;
 #define DYNAMIC_LOAD_LAPACK_WRAP(__name)                                       \
  struct DynLoad__##__name {                                                   \
    template <typename... Args>                                                \
-    auto operator()(Args... args)->decltype(__name(args...)) {                 \
+    auto operator()(Args... args) -> decltype(__name(args...)) {               \
      using lapack_func = decltype(__name(args...)) (*)(Args...);              \
      std::call_once(lapack_dso_flag, GetLapackDsoHandle, &lapack_dso_handle); \
      void* p_##__name = dlsym(lapack_dso_handle, #__name);                    \
@ -41,24 +41,27 @@ void* lapack_dso_handle = nullptr;
  } __name;  // struct DynLoad__##__name

 // clang-format off
-#ifdef PADDLE_USE_LAPACK
 #ifdef PADDLE_USE_ATLAS
-  #define LAPACK_ROUTINE_EACH(__macro)        \
-    __macro(clapack_sgetrf)                   \
-    __macro(clapack_dgetrf)                   \
-    __macro(clapack_sgetri)                   \
-    __macro(clapack_dgetri)
+  #define  PADDLE_SGETRF  clapack_sgetrf
+  #define  PADDLE_DGETRF  clapack_dgetrf
+  #define  PADDLE_SGETRI  clapack_sgetri
+  #define  PADDLE_DGETRI  clapack_dgetri
 #else
-  #define LAPACK_ROUTINE_EACH(__macro)        \
-    __macro(LAPACKE_sgetrf)                   \
-    __macro(LAPACKE_dgetrf)                   \
-    __macro(LAPACKE_sgetri)                   \
-    __macro(LAPACKE_dgetri)
-#endif
+  #define  PADDLE_SGETRF  LAPACKE_sgetrf
+  #define  PADDLE_DGETRF  LAPACKE_dgetrf
+  #define  PADDLE_SGETRI  LAPACKE_sgetri
+  #define  PADDLE_DGETRI  LAPACKE_dgetri
+#endif  
+
+#define LAPACK_ROUTINE_EACH(__macro)       \
+  __macro(PADDLE_SGETRF)                   \
+  __macro(PADDLE_DGETRF)                   \
+  __macro(PADDLE_SGETRI)                   \
+  __macro(PADDLE_DGETRI)
+// clang-format on
+
 LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP)
-#endif

-// clang-format on
 }  // namespace dynload

 namespace paddle {
@ -130,16 +133,7 @@ int getrf<float>(const CBLAS_ORDER order,
                 float* A,
                 const int lda,
                 int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
-#ifdef PADDLE_USE_ATLAS
-  return dynload::clapack_sgetrf(order, M, N, A, lda, ipiv);
-#else
-  return dynload::LAPACKE_sgetrf(order, M, N, A, lda, ipiv);
-#endif
-#else
-  LOG(FATAL) << "Not implemented";
-#endif
-  return 0;
+  return dynload::PADDLE_SGETRF(order, M, N, A, lda, ipiv);
 }

 template <>
@ -149,16 +143,7 @@ int getrf<double>(const CBLAS_ORDER order,
                  double* A,
                  const int lda,
                  int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
-#ifdef PADDLE_USE_ATLAS
-  return dynload::clapack_dgetrf(order, M, N, A, lda, ipiv);
-#else
-  return dynload::LAPACKE_dgetrf(order, M, N, A, lda, ipiv);
-#endif
-#else
-  LOG(FATAL) << "Not implemented";
-#endif
-  return 0;
+  return dynload::PADDLE_DGETRF(order, M, N, A, lda, ipiv);
 }

 template <>
@ -167,16 +152,7 @@ int getri<float>(const CBLAS_ORDER order,
                 float* A,
                 const int lda,
                 const int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
-#ifdef PADDLE_USE_ATLAS
-  return dynload::clapack_sgetri(order, N, A, lda, ipiv);
-#else
-  return dynload::LAPACKE_sgetri(order, N, A, lda, ipiv);
-#endif
-#else
-  LOG(FATAL) << "Not implemented";
-#endif
-  return 0;
+  return dynload::PADDLE_SGETRI(order, N, A, lda, ipiv);
 }

 template <>
@ -185,15 +161,7 @@ int getri<double>(const CBLAS_ORDER order,
                  double* A,
                  const int lda,
                  const int* ipiv) {
-#ifdef PADDLE_USE_LAPACK
-#ifdef PADDLE_USE_ATLAS
-  return dynload::clapack_dgetri(order, N, A, lda, ipiv);
-#else
-  return dynload::LAPACKE_dgetri(order, N, A, lda, ipiv);
-#endif
-#else
-  LOG(FATAL) << "Not implemented";
-#endif
+  return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
  return 0;
 }

--- a/Show More
+++ b/Show More