@ -74,7 +74,7 @@ endfunction()
# select_nvcc_arch_flags ( out_variable )
# select_nvcc_arch_flags ( out_variable )
function ( select_nvcc_arch_flags out_variable )
function ( select_nvcc_arch_flags out_variable )
# L i s t o f a r c h n a m e s
# L i s t o f a r c h n a m e s
set ( archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "A mpere" "A ll" "Manual" )
set ( archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "A ll" "Manual" )
set ( archs_name_default "Auto" )
set ( archs_name_default "Auto" )
list ( APPEND archs_names "Auto" )
list ( APPEND archs_names "Auto" )
@ -91,7 +91,7 @@ function(select_nvcc_arch_flags out_variable)
if ( ${ CUDA_ARCH_NAME } STREQUAL "Manual" )
if ( ${ CUDA_ARCH_NAME } STREQUAL "Manual" )
set ( CUDA_ARCH_BIN ${ paddle_known_gpu_archs } CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported" )
set ( CUDA_ARCH_BIN ${ paddle_known_gpu_archs } CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported" )
set ( CUDA_ARCH_PTX " " CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for" )
set ( CUDA_ARCH_PTX " 50 " CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for" )
mark_as_advanced ( CUDA_ARCH_BIN CUDA_ARCH_PTX )
mark_as_advanced ( CUDA_ARCH_BIN CUDA_ARCH_PTX )
else ( )
else ( )
unset ( CUDA_ARCH_BIN CACHE )
unset ( CUDA_ARCH_BIN CACHE )
@ -108,8 +108,6 @@ function(select_nvcc_arch_flags out_variable)
set ( cuda_arch_bin "70" )
set ( cuda_arch_bin "70" )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "Turing" )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "Turing" )
set ( cuda_arch_bin "75" )
set ( cuda_arch_bin "75" )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "Ampere" )
set ( cuda_arch_bin "80" )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "All" )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "All" )
set ( cuda_arch_bin ${ paddle_known_gpu_archs } )
set ( cuda_arch_bin ${ paddle_known_gpu_archs } )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "Auto" )
elseif ( ${ CUDA_ARCH_NAME } STREQUAL "Auto" )
@ -177,22 +175,14 @@ elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) # CUDA 9.x
set ( paddle_known_gpu_archs ${ paddle_known_gpu_archs9 } )
set ( paddle_known_gpu_archs ${ paddle_known_gpu_archs9 } )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets" )
elseif ( ${ CMAKE_CUDA_COMPILER_VERSION } LESS 11.0 ) # C U D A 1 0 . x
elseif ( ${ CMAKE_CUDA_COMPILER_VERSION } LESS 11.0 ) # C U D A 1 0 . x
set ( paddle_known_gpu_archs ${ paddle_known_gpu_archs10 } )
set ( paddle_known_gpu_archs ${ paddle_known_gpu_archs10 } )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets" )
elseif ( ${ CMAKE_CUDA_COMPILER_VERSION } LESS 12.0 ) # C U D A 1 1 . x
elseif ( ${ CMAKE_CUDA_COMPILER_VERSION } LESS 11.2 ) # C U D A 1 1 . 0 / 1 1 . 1
set ( paddle_known_gpu_archs ${ paddle_known_gpu_archs11 } )
set ( paddle_known_gpu_archs ${ paddle_known_gpu_archs11 } )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets" )
elseif ( ${ CMAKE_CUDA_COMPILER_VERSION } LESS 12.0 ) # C U D A 1 1 . 2 +
set ( paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets" )
endif ( )
endif ( )
if ( NOT ${ CMAKE_CUDA_COMPILER_VERSION } LESS 10.0 )
if ( NOT ${ CMAKE_CUDA_COMPILER_VERSION } LESS 10.0 )
@ -208,11 +198,14 @@ select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}" )
set ( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}" )
message ( STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}" )
message ( STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}" )
# S e t C + + 1 4 s u p p o r t
# S e t C + + 1 1 s u p p o r t
set ( CUDA_PROPAGATE_HOST_FLAGS OFF )
set ( CUDA_PROPAGATE_HOST_FLAGS OFF )
# R e l e a s e / D e b u g f l a g s s e t b y c m a k e . S u c h a s - O 3 - g - D N D E B U G e t c .
# R e l e a s e / D e b u g f l a g s s e t b y c m a k e . S u c h a s - O 3 - g - D N D E B U G e t c .
# S o , d o n ' t s e t t h e s e f l a g s h e r e .
# S o , d o n ' t s e t t h e s e f l a g s h e r e .
set ( CMAKE_CUDA_STANDARD 14 )
if ( NOT WIN32 ) # w i n d o w s m s v c 2 0 1 5 s u p p o r t c + + 1 1 n a t i v e l y .
# - s t d = c + + 1 1 - f P I C n o t r e c o g i n i z e b y m s v c , - X c o m p i l e r w i l l b e a d d e d b y c m a k e .
set ( CMAKE_CUDA_STANDARD 11 )
endif ( NOT WIN32 )
# ( N o t e ) F o r w i n d o w s , i f d e l e t e / W [ 1 - 4 ] , / W 1 w i l l b e a d d e d d e f a u l t l y a n d c o n f l i c w i t h - w
# ( N o t e ) F o r w i n d o w s , i f d e l e t e / W [ 1 - 4 ] , / W 1 w i l l b e a d d e d d e f a u l t l y a n d c o n f l i c w i t h - w
# S o r e p l a c e / W [ 1 - 4 ] w i t h / W 0
# S o r e p l a c e / W [ 1 - 4 ] w i t h / W 0