[ROCM] update cmake and dockerfile, test=develop (#30598)

revert-31068-fix_conv3d_windows
Qi Li 4 years ago committed by GitHub
parent cf9bdb9404
commit 1f5841c2a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -148,8 +148,9 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF) option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF)
################################ Internal Configurations ####################################### ################################ Internal Configurations #######################################
option(WITH_ROCM_PLATFORM "Compile PaddlePaddle with ROCM platform" OFF) option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_RCCL "Compile PaddlePaddle with RCCL support" OFF)
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(WITH_INCREMENTAL_COVERAGE "Generate coverage reports only for incremental code" OFF) option(WITH_INCREMENTAL_COVERAGE "Generate coverage reports only for incremental code" OFF)
@ -278,19 +279,25 @@ include(configure) # add paddle env configuration
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
if(NOT DEFINED ENV{ROCM_PATH}) if(WITH_ROCM)
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed") include(hip)
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed") endif(WITH_ROCM)
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") if (NOT WITH_ROCM AND WITH_RCCL)
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed") MESSAGE(WARNING
"Disable RCCL when compiling without GPU. Force WITH_RCCL=OFF.")
set(WITH_NCCL OFF CACHE STRING
"Disable RCCL when compiling without GPU" FORCE)
endif() endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
if(WITH_ROCM_PLATFORM) if(WITH_RCCL)
find_package(HIP) add_definitions("-DPADDLE_WITH_RCCL")
include(hip) include(rccl)
endif(WITH_ROCM_PLATFORM) else()
if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
endif()
endif()
if(WITH_NV_JETSON) if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE) set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)

@ -130,14 +130,10 @@ if(WITH_GPU)
endif() endif()
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
endif() endif()
elseif(WITH_ROCM_PLATFORM) elseif(WITH_ROCM)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
add_definitions(-DEIGEN_USE_GPU)
add_definitions(-DEIGEN_USE_HIP) add_definitions(-DEIGEN_USE_HIP)
add_definitions(-D__HIP_PLATFORM_HCC__)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
else() else()
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)

@ -155,7 +155,7 @@ set(COMMON_FLAGS
) )
if(NOT APPLE) if(NOT APPLE)
if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM_PLATFORM AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 7.3)) if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM))
set(COMMON_FLAGS set(COMMON_FLAGS
${COMMON_FLAGS} ${COMMON_FLAGS}
-Wno-format-truncation # Warning in boost gcc 8.2 -Wno-format-truncation # Warning in boost gcc 8.2
@ -213,5 +213,17 @@ foreach(flag ${GPU_COMMON_FLAGS})
safe_set_nvflag(${flag}) safe_set_nvflag(${flag})
endforeach() endforeach()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") if(WITH_GPU)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()
if(WITH_ROCM)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()
# Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex
if(WITH_ROCM)
string (REPLACE "-Werror" "-Wno-error" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string (REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
endif()

@ -382,6 +382,9 @@ function(cc_binary TARGET_NAME)
endif() endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules}) target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif()
check_coverage_opt(${TARGET_NAME} ${cc_binary_SRCS}) check_coverage_opt(${TARGET_NAME} ${cc_binary_SRCS})
@ -403,6 +406,9 @@ function(cc_test_build TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif()
endif() endif()
check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS}) check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
@ -538,33 +544,24 @@ function(nv_test TARGET_NAME)
endfunction(nv_test) endfunction(nv_test)
function(hip_library TARGET_NAME) function(hip_library TARGET_NAME)
if (WITH_ROCM_PLATFORM) if (WITH_ROCM)
set(options STATIC static SHARED shared) set(options STATIC static SHARED shared)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_library_SRCS})
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if(hip_library_SRCS) if(hip_library_SRCS)
# FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found
if(NOT ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators")
set_source_files_properties(${hip_library_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
endif()
if (hip_library_SHARED OR hip_library_shared) # build *.so if (hip_library_SHARED OR hip_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources}) hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
else() else()
add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources}) hip_add_library(${TARGET_NAME} STATIC ${hip_library_SRCS})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(${TARGET_NAME} ${ROCM_PATH}/hip/lib/libhip_hcc.so)
find_fluid_modules(${TARGET_NAME}) find_fluid_modules(${TARGET_NAME})
endif() endif()
if("${hip_library_DEPS}" MATCHES "ARCHIVE_START") if (hip_library_DEPS)
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS). add_dependencies(${TARGET_NAME} ${hip_library_DEPS})
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
list(REMOVE_ITEM hip_library_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${hip_library_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
endif() endif()
# cpplint code style # cpplint code style
@ -574,72 +571,27 @@ function(hip_library TARGET_NAME)
list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif() endif()
endforeach() endforeach()
check_coverage_opt(${TARGET_NAME} ${hip_library_SRCS})
else(hip_library_SRCS) else(hip_library_SRCS)
if (hip_library_DEPS) if (hip_library_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_DEPS}) list(REMOVE_DUPLICATES hip_library_DEPS)
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:hip_library")
target_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
add_dependencies(${TARGET_NAME} ${hip_library_DEPS})
else() else()
message(FATAL "Please specify source file or library in nv_library.") message(FATAL "Please specify source file or library in hip_library.")
endif() endif()
endif(hip_library_SRCS) endif(hip_library_SRCS)
endif() endif()
endfunction(hip_library) endfunction(hip_library)
function(hip_library_ops TARGET_NAME)
if (WITH_ROCM_PLATFORM)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_library_ops "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_library_ops_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if(hip_library_ops_SRCS)
if (hip_library_ops_SHARED OR hip_library_ops_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
else()
add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(${TARGET_NAME} ${ROCM_PATH}/hip/lib/libhip_hcc.so)
find_fluid_modules(${TARGET_NAME})
endif()
if("${hip_library_ops_DEPS}" MATCHES "ARCHIVE_START")
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${hip_library_ops_DEPS})
list(REMOVE_ITEM hip_library_ops_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${hip_library_ops_DEPS})
endif()
# cpplint code style
foreach(source_file ${hip_library_ops_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND hip_library_ops_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
else(hip_library_ops_SRCS)
if (hip_library_ops_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_ops_DEPS})
else()
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(hip_library_ops_SRCS)
endif()
endfunction(hip_library_ops)
function(hip_binary TARGET_NAME) function(hip_binary TARGET_NAME)
if (WITH_ROCM_PLATFORM) if (WITH_ROCM)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) # FindHIP.cmake defined hip_add_executable, HIP_SOURCE_PROPERTY_FORMAT is requried for .cc files
hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS}) hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS})
if(hip_binary_DEPS) if(hip_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS})
@ -647,34 +599,29 @@ function(hip_binary TARGET_NAME)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
endif() endif()
endif() endif()
check_coverage_opt(${TARGET_NAME} ${hip_binary_SRCS})
endfunction(hip_binary) endfunction(hip_binary)
function(hip_test TARGET_NAME) function(hip_test TARGET_NAME)
if (WITH_ROCM_PLATFORM AND WITH_TESTING) # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation
set(options "") # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files
# other than *.py are modified.
if (WITH_ROCM AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_test_SRCS}) # FindHIP.cmake defined hip_add_executable, HIP_SOURCE_PROPERTY_FORMAT is requried for .cc files
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) hip_add_executable(${TARGET_NAME} ${hip_test_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) # "-pthread -ldl -lrt" is defined in CMAKE_CXX_LINK_EXECUTABLE
if(_source_files) target_link_options(${TARGET_NAME} PRIVATE -pthread -ldl -lrt)
list(REMOVE_ITEM _sources ${_source_files})
endif()
add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags ${os_dependency_modules}) target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif() endif()
check_coverage_opt(${TARGET_NAME} ${hip_test_SRCS})
endfunction(hip_test) endfunction(hip_test)
function(go_library TARGET_NAME) function(go_library TARGET_NAME)

@ -1,104 +1,86 @@
if(NOT WITH_ROCM_PLATFORM) if(NOT WITH_ROCM)
return() return()
endif() endif()
include_directories("${ROCM_PATH}/include") if(NOT DEFINED ENV{ROCM_PATH})
include_directories("${ROCM_PATH}/hip/include") set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
include_directories("${ROCM_PATH}/miopen/include") set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
include_directories("${ROCM_PATH}/hipblas/include") set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
include_directories("${ROCM_PATH}/rocblas/include") else()
include_directories("${ROCM_PATH}/hiprand/include") set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
include_directories("${ROCM_PATH}/rocrand/include") set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
include_directories("${ROCM_PATH}/rccl/include") set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
include_directories("${ROCM_PATH}/rocthrust/include/")
include_directories("${ROCM_PATH}/hipcub/include/")
include_directories("${ROCM_PATH}/rocprim/include/")
include_directories("${ROCM_PATH}/hipsparse/include/")
include_directories("${ROCM_PATH}/rocsparse/include/")
include_directories("${ROCM_PATH}/rocfft/include/")
set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "")
set(HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS "")
# now default is clang
set(HIP_COMPILER "clang")
list(APPEND EXTERNAL_LIBS "-L${ROCM_PATH}/lib/ -lhip_hcc")
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -DEIGEN_USE_HIP -DEIGEN_USE_GPU -D__HIP_NO_HALF_CONVERSIONS__ -std=c++11 --amdgpu-target=gfx906" )
if(WITH_RCCL)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_RCCL")
endif() endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
if(NOT WITH_PYTHON)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_NO_PYTHON") find_package(HIP REQUIRED)
endif(NOT WITH_PYTHON) include_directories(${ROCM_PATH}/include)
message(STATUS "HIP version: ${HIP_VERSION}")
if(WITH_DSO) message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}")
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_USE_DSO")
endif(WITH_DSO) macro(find_package_and_include PACKAGE_NAME)
find_package("${PACKAGE_NAME}" REQUIRED)
if(WITH_TESTING) include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_TESTING") message(STATUS "${PACKAGE_NAME} version: ${${PACKAGE_NAME}_VERSION}")
endif(WITH_TESTING) endmacro()
if(WITH_DISTRIBUTE) find_package_and_include(miopen)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_DISTRIBUTE") find_package_and_include(rocblas)
endif(WITH_DISTRIBUTE) find_package_and_include(hiprand)
find_package_and_include(rocrand)
if(WITH_GRPC) find_package_and_include(rccl)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_GRPC") find_package_and_include(rocthrust)
endif(WITH_GRPC) find_package_and_include(hipcub)
find_package_and_include(rocprim)
if(WITH_MKLDNN) find_package_and_include(hipsparse)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_MKLDNN") find_package_and_include(rocsparse)
endif(WITH_MKLDNN) find_package_and_include(rocfft)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DANY_IMPL_ANY_CAST_MOVEABLE") # set CXX flags for HIP
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") # define HIP_CXX_FLAGS
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL}) list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-pass-failed)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -std=c++14)
if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND HIP_CXX_FLAGS -g2)
list(APPEND HIP_CXX_FLAGS -O0)
list(APPEND HIP_HIPCC_FLAGS -fdebug-info-for-profiling)
endif(CMAKE_BUILD_TYPE MATCHES Debug)
set(HIP_HCC_FLAGS ${HIP_CXX_FLAGS})
set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx906)
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906)
if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
set(hip_library_name hip_hcc)
endif() endif()
message(STATUS "HIP library name: ${hip_library_name}")
if("${HIP_COMPILER}" STREQUAL "hcc") # set HIP link libs
if("x${HCC_HOME}" STREQUAL "x") find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
set(HCC_HOME "${ROCM_PATH}/hcc") message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}")
endif()
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -ldl --amdgpu-target=gfx906 ")
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared --amdgpu-target=gfx906")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared --amdgpu-target=gfx906")
elseif("${HIP_COMPILER}" STREQUAL "clang")
if("x${HIP_CLANG_PATH}" STREQUAL "x")
set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin")
endif()
#Number of parallel jobs by default is 1
if(NOT DEFINED HIP_CLANG_NUM_PARALLEL_JOBS)
set(HIP_CLANG_NUM_PARALLEL_JOBS 1)
endif()
#Add support for parallel build and link
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
check_cxx_compiler_flag("-parallel-jobs=1" HIP_CLANG_SUPPORTS_PARALLEL_JOBS)
endif()
if(HIP_CLANG_NUM_PARALLEL_JOBS GREATER 1)
if(${HIP_CLANG_SUPPORTS_PARALLEL_JOBS})
set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "-parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS} -Wno-format-nonliteral")
set(HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS "-parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS}")
else()
message("clang compiler doesn't support parallel jobs")
endif()
endif()
# Set the CMake Flags to use the HIP-Clang Compiler.
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES> --amdgpu-target=gfx906")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <LINK_LIBRARIES> -shared --amdgpu-target=gfx906" )
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -ldl --amdgpu-target=gfx906")
endif()

@ -7,13 +7,16 @@ function(op_library TARGET)
# for ops. # for ops.
set(cc_srcs) set(cc_srcs)
set(cu_srcs) set(cu_srcs)
set(hip_cu_srcs) set(hip_srcs)
set(miopen_hip_cc_srcs)
set(cu_cc_srcs) set(cu_cc_srcs)
set(hip_cc_srcs)
set(xpu_cc_srcs) set(xpu_cc_srcs)
set(cudnn_cu_cc_srcs) set(cudnn_cu_cc_srcs)
set(miopen_cu_cc_srcs)
set(cudnn_cu_srcs) set(cudnn_cu_srcs)
set(miopen_cu_srcs)
set(CUDNN_FILE) set(CUDNN_FILE)
set(MIOPEN_FILE)
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(MKLDNN_FILE) set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer common_infer_shape_functions) set(op_common_deps operator op_registry math_function layer common_infer_shape_functions)
@ -30,46 +33,44 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
list(APPEND cc_srcs ${TARGET}.cc) list(APPEND cc_srcs ${TARGET}.cc)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) if(WITH_GPU)
list(APPEND cu_cc_srcs ${TARGET}.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
endif() list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) endif()
list(APPEND cu_srcs ${TARGET}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
endif() list(APPEND cu_srcs ${TARGET}.cu)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) endif()
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
endif() list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu) string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
list(APPEND hip_cu_srcs ${TARGET}.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
endif() list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}") endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc) list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
endif() endif()
if(WITH_ROCM_PLATFORM) if(WITH_ROCM)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND hip_cu_cc_srcs ${TARGET}.hip.cu.cc) list(APPEND hip_cc_srcs ${TARGET}.cu.cc)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND hip_cu_srcs ${TARGET}.hip.cu) list(APPEND hip_srcs ${TARGET}.cu)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
list(APPEND hip_cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu) list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif() endif()
string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}") string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc)
list(APPEND miopen_hip_cu_cc_srcs ${MIOPEN_FILE}.hip.cu.cc) list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu)
list(APPEND miopen_hip_cu_srcs ${MIOPEN_FILE}.hip.cu) list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu)
endif() endif()
endif() endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
@ -86,20 +87,20 @@ function(op_library TARGET)
endif() endif()
else() else()
foreach(src ${op_library_SRCS}) foreach(src ${op_library_SRCS})
if (WITH_ROCM_PLATFORM AND ${src} MATCHES ".*\\.hip.cu$") if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND hip_cu_srcs ${src}) list(APPEND miopen_cu_srcs ${src})
elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*\\.hip.cu.cc$") elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$")
list(APPEND hip_cu_cc_srcs ${src}) list(APPEND hip_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND miopen_cu_cc_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$")
list(APPEND hip_cc_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu$") elseif(${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND cudnn_cu_srcs ${src}) list(APPEND cudnn_cu_srcs ${src})
elseif (${src} MATCHES ".*\\.cu$") elseif (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src}) list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu.cc$") elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND cudnn_cu_cc_srcs ${src}) list(APPEND cudnn_cu_cc_srcs ${src})
elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*_miopen_op.hip.cc$")
list(APPEND miopen_hip_cc_srcs ${src})
elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*_miopen_op.hip.cu$")
list(APPEND miopen_hip_cu_srcs ${src})
elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs ${src}) list(APPEND mkldnn_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$") elseif(${src} MATCHES ".*\\.cu.cc$")
@ -163,8 +164,13 @@ function(op_library TARGET)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
endif() endif()
elseif (WITH_ROCM_PLATFORM) elseif (WITH_ROCM)
hip_library_ops(${TARGET} SRCS ${cc_srcs} ${hip_cu_cc_srcs} ${hip_cu_srcs} ${miopen_hip_cu_cc_srcs} ${miopen_hip_cu_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "correlation_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
else() else()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
@ -227,13 +233,14 @@ function(op_library TARGET)
# pybind USE_CPU_ONLY_OP # pybind USE_CPU_ONLY_OP
list(LENGTH cu_srcs cu_srcs_len) list(LENGTH cu_srcs cu_srcs_len)
list(LENGTH hip_srcs hip_srcs_len)
list(LENGTH cu_cc_srcs cu_cc_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len)
list(LENGTH hip_cc_srcs hip_cc_srcs_len)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH hip_cu_srcs hip_cu_srcs_len) list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0) ${hip_srcs_len} EQUAL 0 AND ${hip_cc_srcs_len} EQUAL 0 AND ${miopen_cu_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0)
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
@ -248,26 +255,26 @@ function(op_library TARGET)
endif() endif()
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
if (WITH_GPU AND ${cudnn_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN # pybind USE_OP_DEVICE_KERNEL for MIOPEN
list(LENGTH miopen_hip_cu_cc_srcs miopen_hip_cu_cc_srcs_len) list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
if (WITH_ROCM_PLATFORM AND ${miopen_hip_cu_cc_srcs_len} GREATER 0) if (WITH_ROCM AND ${miopen_cu_cc_srcs_len} GREATER 0)
if(${TARGET} STREQUAL "activation") if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else() else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif() endif()
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
if (WITH_GPU AND ${cudnn_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN # pybind USE_OP_DEVICE_KERNEL for MIOPEN
list(LENGTH miopen_hip_cu_srcs miopen_hip_cu_srcs_len) list(LENGTH miopen_cu_srcs miopen_cu_srcs_len)
if (WITH_ROCM_PLATFORM AND ${miopen_hip_cu_srcs_len} GREATER 0) if (WITH_ROCM AND ${miopen_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif() endif()
if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0) if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)

@ -0,0 +1,28 @@
if(NOT WITH_ROCM)
return()
endif()
# Now we don't support RCCL on windows
if(WIN32)
return()
endif()
if(WITH_RCCL)
set(RCCL_ROOT ${ROCM_PATH}/rccl CACHE PATH "RCCL ROOT")
find_path(RCCL_INCLUDE_DIR rccl.h
PATHS ${RCCL_ROOT} ${RCCL_ROOT}/include ${RCCL_ROOT}/local/include
$ENV{RCCL_ROOT} $ENV{RCCL_ROOT}/include $ENV{RCCL_ROOT}/local/include
NO_DEFAULT_PATH
)
file(READ ${RCCL_INCLUDE_DIR}/rccl.h RCCL_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)"
RCCL_VERSION "${RCCL_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1"
RCCL_VERSION "${RCCL_VERSION}")
# 2604 for ROCM3.5 and 2708 for ROCM 3.9
message(STATUS "Current RCCL header is ${RCCL_INCLUDE_DIR}/rccl.h. "
"Current RCCL version is v${RCCL_VERSION}. ")
endif()

@ -6,6 +6,8 @@ set(PY_FILES paddle/__init__.py
if(WITH_GPU) if(WITH_GPU)
SET(PACKAGE_NAME "paddlepaddle-gpu") SET(PACKAGE_NAME "paddlepaddle-gpu")
elseif(WITH_ROCM)
SET(PACKAGE_NAME "paddlepaddle-rocm")
else() else()
SET(PACKAGE_NAME "paddlepaddle") SET(PACKAGE_NAME "paddlepaddle")
endif() endif()

@ -1,4 +1,6 @@
if (WITH_GPU) if(WITH_ROCM)
hip_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
elseif(WITH_GPU)
nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared) nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
else() else()
cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared) cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared)

@ -5,7 +5,7 @@ set(dist_ENVS http_proxy="" https_proxy="")
file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op")
if(NOT WITH_NCCL) if ((NOT WITH_NCCL) AND (NOT WITH_RCCL))
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl")
endif() endif()
string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
@ -63,7 +63,7 @@ foreach(TEST_OP ${MIXED_DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP}) list(REMOVE_ITEM TEST_OPS ${TEST_OP})
endforeach() endforeach()
if(NOT WITH_GPU OR WIN32) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op) LIST(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op)
LIST(REMOVE_ITEM TEST_OPS test_allgather) LIST(REMOVE_ITEM TEST_OPS test_allgather)
LIST(REMOVE_ITEM TEST_OPS test_allreduce) LIST(REMOVE_ITEM TEST_OPS test_allreduce)
@ -146,7 +146,7 @@ if(APPLE OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_fleet_metric) LIST(REMOVE_ITEM TEST_OPS test_fleet_metric)
endif() endif()
if (NOT ${WITH_GPU}) if ((NOT WITH_GPU) AND (NOT WITH_ROCM))
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
LIST(REMOVE_ITEM TEST_OPS test_rank_attention_op) # TODO(shenliang03): rank_attention_op support CPU device in future LIST(REMOVE_ITEM TEST_OPS test_rank_attention_op) # TODO(shenliang03): rank_attention_op support CPU device in future
LIST(REMOVE_ITEM TEST_OPS test_batch_fc_op) # TODO(shenliang03): batch_fc_op support CPU device in future LIST(REMOVE_ITEM TEST_OPS test_batch_fc_op) # TODO(shenliang03): batch_fc_op support CPU device in future
@ -159,9 +159,10 @@ if (NOT ${WITH_GPU})
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm)
LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision)
LIST(REMOVE_ITEM TEST_OPS test_fleet_base_single) LIST(REMOVE_ITEM TEST_OPS test_fleet_base_single)
elseif(WITH_GPU)
elseif(${CUDNN_VERSION} VERSION_LESS 7100) if (${CUDNN_VERSION} VERSION_LESS 7100)
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
endif()
endif() endif()
if (WITH_NCCL) if (WITH_NCCL)
@ -172,11 +173,11 @@ if (WITH_NCCL)
endif() endif()
endif() endif()
if(NOT WITH_NCCL) if ((NOT WITH_NCCL) AND (NOT WITH_RCCL))
list(REMOVE_ITEM TEST_OPS test_imperative_group) list(REMOVE_ITEM TEST_OPS test_imperative_group)
endif() endif()
if(NOT WITH_GPU OR WIN32) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_boxps) LIST(REMOVE_ITEM TEST_OPS test_boxps)
endif() endif()
list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
@ -213,7 +214,7 @@ endif()
list(REMOVE_ITEM TEST_OPS test_fleet_pyramid_hash) list(REMOVE_ITEM TEST_OPS test_fleet_pyramid_hash)
if(WITH_GPU OR NOT WITH_MKLML) if((WITH_ROCM OR WITH_GPU) OR NOT WITH_MKLML)
# matmul with multiple heads need MKL support # matmul with multiple heads need MKL support
LIST(REMOVE_ITEM TEST_OPS test_matmul_op_with_head) LIST(REMOVE_ITEM TEST_OPS test_matmul_op_with_head)
endif() endif()
@ -510,7 +511,7 @@ if(WITH_DISTRIBUTE)
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc")
endif() endif()
if(NOT APPLE) if(NOT APPLE)
if(WITH_GPU) if(WITH_GPU OR WITH_ROCM)
bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
py_test_modules(test_launch_coverage MODULES test_launch_coverage) py_test_modules(test_launch_coverage MODULES test_launch_coverage)
endif() endif()
@ -667,7 +668,7 @@ if (WITH_DISTRIBUTE)
endif() endif()
if (WITH_DISTRIBUTE AND NOT APPLE) if (WITH_DISTRIBUTE AND NOT APPLE)
if(WITH_GPU) if(WITH_GPU OR WITH_ROCM)
set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120) set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120)
endif() endif()
@ -821,7 +822,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 120)
endif() endif()
endif() endif()
if(WITH_GPU AND NOT WIN32) if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120)
@ -851,7 +852,7 @@ if(WITH_GPU AND NOT WIN32)
test_collective_allgather_api test_collective_allgather_api
PROPERTIES LABELS "RUN_TYPE=DIST") PROPERTIES LABELS "RUN_TYPE=DIST")
endif() endif()
if(WITH_GPU) if(WITH_GPU OR WITH_ROCM)
set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT 120)
set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120)

@ -1,7 +1,7 @@
file(GLOB TEST_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") file(GLOB TEST_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_IR_PASSES "${TEST_IR_PASSES}") string(REPLACE ".py" "" TEST_IR_PASSES "${TEST_IR_PASSES}")
if(NOT WITH_GPU OR WIN32 OR APPLE) if(((NOT WITH_GPU) AND (NOT WITH_ROCM)) OR WIN32 OR APPLE)
LIST(REMOVE_ITEM TEST_IR_PASSES test_ir_fusion_group_pass) LIST(REMOVE_ITEM TEST_IR_PASSES test_ir_fusion_group_pass)
endif() endif()

@ -55,7 +55,7 @@ function test_launch_ps_heter(){
fi fi
} }
if [[ ${WITH_GPU} == "OFF" ]]; then if [[ ${WITH_GPU} == "OFF" && ("${WITH_ROCM}x" == "x" || ${WITH_ROCM} == "OFF") ]]; then
echo "in cpu test mode" echo "in cpu test mode"
test_launch_ps test_launch_ps
exit 0 exit 0

@ -19,7 +19,7 @@ function(py_dist_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32) if(WITH_COVERAGE AND (WITH_GPU OR WITH_ROCM) AND (WITH_NCCL OR WITH_RCCL) AND NOT WIN32)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1

@ -401,7 +401,7 @@ headers = (
if '${WITH_MKLDNN}' == 'ON': if '${WITH_MKLDNN}' == 'ON':
headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn
if '${WITH_GPU}' == 'ON': if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
headers += list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage headers += list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage
class InstallCommand(InstallCommandBase): class InstallCommand(InstallCommandBase):
@ -462,7 +462,7 @@ class InstallHeaders(Command):
def run(self): def run(self):
# only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows # only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows
if os.name == 'nt' or sys.platform == 'darwin': if os.name == 'nt' or sys.platform == 'darwin':
if '${WITH_GPU}' == 'ON': if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb') self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb')
return return
hdrs = self.distribution.headers hdrs = self.distribution.headers

File diff suppressed because it is too large Load Diff

@ -65,7 +65,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \
wget -q https://cmake.org/files/v3.16/cmake-3.16.0.tar.gz && tar xzf cmake-3.16.0.tar.gz && \ wget -q https://cmake.org/files/v3.16/cmake-3.16.0.tar.gz && tar xzf cmake-3.16.0.tar.gz && \
cd cmake-3.16.0 && ./bootstrap && \ cd cmake-3.16.0 && ./bootstrap && \
make -j8 && make install && cd .. && rm cmake-3.16.0.tar.gz make -j8 && make install && cd .. && rm cmake-3.16.0.tar.gz && rm -rf cmake-3.16.0
# Install newest autoconf # Install newest autoconf
build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
@ -160,3 +160,4 @@ LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}"
wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz
tar xzf binutils-2.27.tar.gz && cd binutils-2.27 tar xzf binutils-2.27.tar.gz && cd binutils-2.27
./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install ./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install
cd .. && rm binutils-2.27.tar.gz && rm -rf binutils-2.27

@ -1,45 +0,0 @@
#!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
function rocm() {
# ROCM 3.3 - not work as rocthrust build fail without AMD GPU
# sed 's#<rocm_repo_version>#3.3#g' Dockerfile.rocm >test/rocm33.dockerfile
# sed -ri 's#<rocprim_version>#3.3.0#g' test/rocm33.dockerfile
# sed -ri 's#<rocthrust_version>#3.3.0#g' test/rocm33.dockerfile
# sed -ri 's#<hipcub_version>#3.3.0#g' test/rocm33.dockerfile
# ROCM 3.5
sed 's#<rocm_repo_version>#3.5.1#g' Dockerfile.rocm >test/rocm35.dockerfile
sed -ri 's#<rocprim_version>#3.5.1#g' test/rocm35.dockerfile
sed -ri 's#<rocthrust_version>#3.5.0#g' test/rocm35.dockerfile
sed -ri 's#<hipcub_version>#3.5.0#g' test/rocm35.dockerfile
# ROCM 3.9
sed 's#<rocm_repo_version>#3.9.1#g' Dockerfile.rocm >test/rocm39.dockerfile
sed -ri 's#<rocprim_version>#3.9.0#g' test/rocm39.dockerfile
sed -ri 's#<rocthrust_version>#3.9.0#g' test/rocm39.dockerfile
sed -ri 's#<hipcub_version>#3.9.0#g' test/rocm39.dockerfile
}
function main() {
if [ ! -d "test" ];then
mkdir test
fi
rocm
}
main
Loading…
Cancel
Save