| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -39,6 +39,8 @@ typedef double2 vecType;
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					#elif defined(__SSE3__)
 | 
					 | 
					 | 
					 | 
					#elif defined(__SSE3__)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					#include "hl_cpu_simd_sse.cuh"
 | 
					 | 
					 | 
					 | 
					#include "hl_cpu_simd_sse.cuh"
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
 | 
					 | 
					 | 
					 | 
					#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(__NVCC__)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					// Currently nvcc does not support neon intrinsic.
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					// TODO: Extract simd intrinsic implementation from .cu files.
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					#include "hl_cpu_simd_neon.cuh"
 | 
					 | 
					 | 
					 | 
					#include "hl_cpu_simd_neon.cuh"
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					#else
 | 
					 | 
					 | 
					 | 
					#else
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					#include "hl_cpu_scalar.cuh"
 | 
					 | 
					 | 
					 | 
					#include "hl_cpu_scalar.cuh"
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |