Demostration of cmake refine for HIP support.

1. Add option WITH_AMD_GPU.
2. Add cmake/hip.cmake for HIP toolchain.
3. Some external module such as eigen may need HIP port.
4. Add macro hip_library/hip_binary/hip_test to cmake/generic.cmake.
5. Add one HIP source concat.hip.cu as an example. Each .cu may have its corresponding .hip.cu.
helinwang-patch-1
sabreshao 7 years ago
parent 6d4a06f26b
commit 45c988d86a

@ -36,6 +36,7 @@ include(simd)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF)
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
@ -69,6 +70,9 @@ if(NOT CMAKE_BUILD_TYPE)
FORCE)
endif()
if(WITH_AMD_GPU)
endif()
if(ANDROID OR IOS)
if(ANDROID)
if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16")
@ -180,6 +184,11 @@ if(WITH_GPU)
include(cuda)
endif(WITH_GPU)
if(WITH_AMD_GPU)
find_package(HIP)
include(hip)
endif(WITH_AMD_GPU)
if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif()

@ -57,11 +57,7 @@ if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG)
if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
if(WITH_GPU)
add_definitions(-DPADDLE_WITH_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
@ -84,7 +80,14 @@ else()
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
elseif(WITH_AMD_GPU)
add_definitions(-DPADDLE_WITH_HIP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
else()
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
endif()
if (WITH_MKLML AND MKLML_IOMP_LIB)
message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}")

@ -1,21 +1,36 @@
INCLUDE(ExternalProject)
SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3)
SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3)
INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR})
ExternalProject_Add(
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3)
if(WITH_AMD_GPU)
ExternalProject_Add(
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/sabreshao/hipeigen.git"
GIT_TAG 0cba03ff9f8f9f70bbd92ac5857b031aa8fed6f9
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
else()
ExternalProject_Add(
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
endif()
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c)

@ -317,6 +317,82 @@ function(nv_test TARGET_NAME)
endif()
endfunction(nv_test)
function(hip_library TARGET_NAME)
if (WITH_AMD_GPU)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_library_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if(hip_library_SRCS)
if (hip_library_SHARED OR hip_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
else()
add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(${TARGET_NAME} /opt/rocm/hip/lib/libhip_hcc.so /opt/rocm/hip/lib/libhip_device.a)
find_fluid_modules(${TARGET_NAME})
endif()
if (hip_library_DEPS)
add_dependencies(${TARGET_NAME} ${hip_library_DEPS})
target_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
endif()
# cpplint code style
foreach(source_file ${hip_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
add_style_check_target(${TARGET_NAME} ${hip_library_SRCS} ${hip_library_HEADERS})
else(hip_library_SRCS)
if (hip_library_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_DEPS})
else()
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(hip_library_SRCS)
endif()
endfunction(hip_library)
function(hip_binary TARGET_NAME)
if (WITH_AMD_GPU)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS})
if(hip_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS})
add_dependencies(${TARGET_NAME} ${hip_binary_DEPS})
endif()
endif()
endfunction(hip_binary)
function(hip_test TARGET_NAME)
if (WITH_AMD_GPU AND WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_test_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
add_test(${TARGET_NAME} ${TARGET_NAME})
endif()
endfunction(hip_test)
function(go_library TARGET_NAME)
set(options STATIC static SHARED shared)
set(oneValueArgs "")

@ -0,0 +1,46 @@
if(NOT WITH_AMD_GPU)
return()
endif()
include_directories("/opt/rocm/include")
include_directories("/opt/rocm/hipblas/include")
include_directories("/opt/rocm/hiprand/include")
include_directories("/opt/rocm/rocrand/include")
include_directories("/opt/rocm/rccl/include")
include_directories("/opt/rocm/thrust")
list(APPEND EXTERNAL_LIBS "-L/opt/rocm/lib/ -lhip_hcc")
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++14" )
if(WITH_DSO)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_USE_DSO")
endif(WITH_DSO)
if(WITH_DOUBLE)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_TYPE_DOUBLE")
endif(WITH_DOUBLE)
if(WITH_TESTING)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_TESTING")
endif(WITH_TESTING)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG})
elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
# Disable optimization since one eigen symbol will be removed in math_function.cu
#list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL})
endif()
if("x${HCC_HOME}" STREQUAL "x")
set(HCC_HOME "/opt/rocm/hcc")
endif()
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared")

@ -76,6 +76,9 @@ function(op_library TARGET)
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
elseif (WITH_AMD_GPU)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS
${op_library_DEPS} ${op_common_deps})
else()
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})

@ -6,6 +6,7 @@ function(math_library TARGET)
# But it handle split GPU/CPU code and link some common library.
set(cc_srcs)
set(cu_srcs)
set(hip_srcs)
set(math_common_deps device_context framework_proto)
set(multiValueArgs DEPS)
cmake_parse_arguments(math_library "${options}" "${oneValueArgs}"
@ -17,10 +18,15 @@ function(math_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
list(APPEND hip_srcs ${TARGET}.hip.cu)
endif()
list(LENGTH cc_srcs cc_srcs_len)
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
elseif (WITH_AMD_GPU)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
elseif(${cc_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif()

File diff suppressed because it is too large Load Diff

@ -1,9 +1,16 @@
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID)
target_link_libraries(paddle_pybind rt)
endif(NOT APPLE AND NOT ANDROID)
if(WITH_AMD_GPU)
hip_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
${GLOB_OP_LIB})
else()
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID)
target_link_libraries(paddle_pybind rt)
endif(NOT APPLE AND NOT ANDROID)
endif(WITH_AMD_GPU)
endif(WITH_PYTHON)

@ -37,6 +37,7 @@ function cmake_gen() {
-DWITH_DSO=ON
-DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF}
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF}
-DWITH_MKL=${WITH_MKL:-ON}
-DWITH_AVX=${WITH_AVX:-OFF}
@ -50,6 +51,7 @@ function cmake_gen() {
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON}
-DWITH_TESTING=${WITH_TESTING:-ON}
-DWITH_FAST_BUNDLE_TEST=ON
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
========================================
EOF
@ -62,6 +64,7 @@ EOF
-DWITH_DSO=ON \
-DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} \
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \
-DWITH_MKL=${WITH_MKL:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \
@ -74,6 +77,7 @@ EOF
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \
-DWITH_TESTING=${WITH_TESTING:-ON} \
-DWITH_FAST_BUNDLE_TEST=ON \
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
}

Loading…
Cancel
Save