Merge pull request #13754 from luotao1/fast_math

disable EIGEN_FAST_MATH and use_fast_math
7 years ago · 75bd0f188b
parent 34b81f606f 28889caea5
commit 75bd0f188b
4 changed files with 16 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -72,6 +72,7 @@ option(WITH_INFERENCE    "Compile fluid inference library"              ON)
 option(WITH_INFERENCE_API_TEST   "Test fluid inference high-level api interface"  OFF)
 option(WITH_SYSTEM_BLAS   "Use system blas library"           OFF)
 option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VERSION})
 option(WITH_FAST_MATH   "Make use of fast math library"                 OFF)
 # PY_VERSION
 if(NOT PY_VERSION)
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@ -175,7 +175,10 @@ list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
 list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
 endif(NOT WIN32)
-list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
+if(WITH_FAST_MATH)
  # Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
  list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
 endif()
 # in cuda9, suppress cuda warning on eigen 
 list(APPEND CUDA_NVCC_FLAGS "-w")
 # Set :expt-relaxed-constexpr to suppress Eigen warnings
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@ -3,6 +3,14 @@ INCLUDE(ExternalProject)
 SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3)
 SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3)
 INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR})
 if(NOT WITH_FAST_MATH)
  # EIGEN_FAST_MATH: https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html
  # enables some optimizations which might affect the accuracy of the result. 
  # This currently enables the SSE vectorization of sin() and cos(), 
  # and speedups sqrt() for single precision.
  # Defined to 1 by default. Define it to 0 to disable.
  add_definitions(-DEIGEN_FAST_MATH=0)
 endif()
 if(WITH_AMD_GPU)
    ExternalProject_Add(
--- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
@ -27,6 +27,9 @@ void SetConfig(AnalysisConfig *cfg) {
  cfg->device = 0;
  cfg->enable_ir_optim = true;
  cfg->specify_input_name = true;
 #ifdef PADDLE_WITH_MKLDNN
  cfg->_use_mkldnn = true;
 #endif
 }
 void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {