|
|
@ -39,6 +39,8 @@ typedef double2 vecType;
|
|
|
|
#elif defined(__SSE3__)
|
|
|
|
#elif defined(__SSE3__)
|
|
|
|
#include "hl_cpu_simd_sse.cuh"
|
|
|
|
#include "hl_cpu_simd_sse.cuh"
|
|
|
|
#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
|
|
|
|
#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
|
|
|
|
|
|
|
|
// Currently nvcc does not support neon intrinsic.
|
|
|
|
|
|
|
|
// TODO: Extract simd intrinsic implementation from .cu files.
|
|
|
|
#include "hl_cpu_simd_neon.cuh"
|
|
|
|
#include "hl_cpu_simd_neon.cuh"
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
#include "hl_cpu_scalar.cuh"
|
|
|
|
#include "hl_cpu_scalar.cuh"
|
|
|
|