|
|
|
@ -2,6 +2,7 @@
|
|
|
|
|
# so that PaddlePaddle can unleash the vectorization power of muticore.
|
|
|
|
|
|
|
|
|
|
INCLUDE(CheckCXXSourceRuns)
|
|
|
|
|
INCLUDE(CheckCXXSourceCompiles)
|
|
|
|
|
|
|
|
|
|
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
|
|
|
set(MMX_FLAG "-mmmx")
|
|
|
|
@ -9,12 +10,14 @@ IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID
|
|
|
|
|
set(SSE3_FLAG "-msse3")
|
|
|
|
|
SET(AVX_FLAG "-mavx")
|
|
|
|
|
SET(AVX2_FLAG "-mavx2")
|
|
|
|
|
SET(NEON_FLAG "-mfloat-abi=softfp -mfpu=neon")
|
|
|
|
|
ELSEIF(MSVC)
|
|
|
|
|
set(MMX_FLAG "/arch:MMX")
|
|
|
|
|
set(SSE2_FLAG "/arch:SSE2")
|
|
|
|
|
set(SSE3_FLAG "/arch:SSE3")
|
|
|
|
|
SET(AVX_FLAG "/arch:AVX")
|
|
|
|
|
SET(AVX2_FLAG "/arch:AVX2")
|
|
|
|
|
#SET(NEON_FLAG "")
|
|
|
|
|
ENDIF()
|
|
|
|
|
|
|
|
|
|
# Check MMX
|
|
|
|
@ -73,4 +76,26 @@ int main()
|
|
|
|
|
return 0;
|
|
|
|
|
}" AVX2_FOUND)
|
|
|
|
|
|
|
|
|
|
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
|
|
|
|
|
# Check NEON
|
|
|
|
|
set(CMAKE_REQUIRED_FLAGS ${NEON_FLAG})
|
|
|
|
|
CHECK_CXX_SOURCE_COMPILES("
|
|
|
|
|
#include <arm_neon.h>
|
|
|
|
|
int main()
|
|
|
|
|
{
|
|
|
|
|
float32x4_t a = {-1.0f, 2.0f, -3.0f, 4.0f};
|
|
|
|
|
float32x4_t b = {1.0f, 2.0f, 3.0f, 4.0f};
|
|
|
|
|
float32x4_t c = vaddq_f32(a, b);
|
|
|
|
|
return 0;
|
|
|
|
|
}" NEON_FOUND)
|
|
|
|
|
|
|
|
|
|
if(NEON_FOUND)
|
|
|
|
|
set(SIMD_FLAG ${NEON_FLAG})
|
|
|
|
|
else(NEON_FOUND)
|
|
|
|
|
if(WITH_AVX)
|
|
|
|
|
set(SIMD_FLAG ${AVX_FLAG})
|
|
|
|
|
else(WITH_AVX)
|
|
|
|
|
set(SIMD_FLAG ${SSE3_FLAG})
|
|
|
|
|
endif(WITH_AVX)
|
|
|
|
|
endif(NEON_FOUND)
|
|
|
|
|
|
|
|
|
|
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND NEON_FOUND)
|
|
|
|
|