add mkl,ctc support for windows

revert-15207-remove_op_handle_lock_and_fix_var
peizhilin 6 years ago
parent 0f085f0a5a
commit 5a6d7fe2ff

@ -125,16 +125,12 @@ if(ANDROID OR IOS)
add_definitions(-DPADDLE_MOBILE_INFERENCE)
endif()
if (APPLE OR WIN32)
if (APPLE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL for building on mac and windows" FORCE)
"Disable MKL for building on mac" FORCE)
endif()
if (WIN32)
set(WITH_DSO OFF CACHE STRING
"Disable DSO when compiling for Windows" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when compiling for Windows" FORCE)
set(WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows" FORCE)
set(WITH_C_API OFF CACHE STRING
@ -207,10 +203,10 @@ include(external/xxhash) # download xxhash
include(external/dlpack)
include(external/snappy) # download snappy
include(external/snappystream) # download snappystream
include(external/warpctc) # download, build, install warpctc
if (NOT WIN32)
# there is no official support of warpctc, nccl, cupti in windows
include(external/warpctc) # download, build, install warpctc
# there is no official support of nccl, cupti in windows
include(cupti)
include(external/gzstream)
endif (NOT WIN32)

@ -139,10 +139,12 @@ endfunction()
message(STATUS "CUDA detected: " ${CUDA_VERSION})
if (${CUDA_VERSION} LESS 7.0)
set(paddle_known_gpu_archs ${paddle_known_gpu_archs})
add_definitions("-DPADDLE_CUDA_BINVER=\"60\"")
elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs7})
list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
add_definitions("-DPADDLE_CUDA_BINVER=\"70\"")
elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs8})
list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
@ -150,6 +152,7 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets")
add_definitions("-DPADDLE_CUDA_BINVER=\"80\"")
endif()
include_directories(${CUDA_INCLUDE_DIRS})

@ -89,6 +89,7 @@ if(CUDNN_FOUND)
if(NOT CUDNN_MAJOR_VERSION)
set(CUDNN_VERSION "???")
else()
add_definitions("-DPADDLE_CUDNN_BINVER=\"${CUDNN_MAJOR_VERSION}\"")
math(EXPR CUDNN_VERSION
"${CUDNN_MAJOR_VERSION} * 1000 +
${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")

@ -32,4 +32,4 @@ endif()
add_dependencies(cub extern_cub)
LIST(APPEND externl_project_dependencies cub)
LIST(APPEND external_project_dependencies cub)

@ -28,4 +28,4 @@ endif()
add_dependencies(dlpack extern_dlpack)
LIST(APPEND externl_project_dependencies dlpack)
LIST(APPEND external_project_dependencies dlpack)

@ -23,15 +23,14 @@ SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
IF(WIN32 OR APPLE)
IF(APPLE)
MESSAGE(WARNING
"Windows or Mac is not supported with MKLDNN in Paddle yet."
"Mac is not supported with MKLDNN in Paddle yet."
"Force WITH_MKLDNN=OFF")
SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in Windows and MacOS" FORCE)
SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in MacOS" FORCE)
return()
ENDIF()
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path")
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
@ -44,10 +43,14 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE()
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF()
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds")
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
IF(NOT WIN32)
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds")
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
ENDIF(NOT WIN32)
ExternalProject_Add(
${MKLDNN_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
@ -58,8 +61,15 @@ ExternalProject_Add(
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
CMAKE_ARGS -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
@ -67,6 +77,11 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
-DMKLROOT:PATH=${MKLML_ROOT}
)
if(WIN32)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
else(WIN32)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
endif(WIN32)
ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
@ -85,10 +100,14 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# copy the real so.0 lib to install dir
# it can be directly contained in wheel or capi
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0)
ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB}
COMMAND cp ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB}
DEPENDS mkldnn)
if(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/lib/mkldnn.dll)
else(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0)
ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB}
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB}
DEPENDS mkldnn)
endif(WIN32)
ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB})
IF(WITH_C_API)

@ -16,59 +16,76 @@ IF(NOT ${WITH_MKLML})
return()
ENDIF(NOT ${WITH_MKLML})
IF(WIN32 OR APPLE)
IF(APPLE)
MESSAGE(WARNING
"Windows or Mac is not supported with MKLML in Paddle yet."
"Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
return()
ENDIF()
INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml")
IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL))
MESSAGE(STATUS "use pre defined download url")
SET(MKLML_VER "mklml_lnx_2019.0.20180710" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
ENDIF()
MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "mklml")
SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR})
SET(MKLML_ROOT ${MKLML_INSTALL_DIR})
SET(MKLML_INC_DIR ${MKLML_ROOT}/include)
SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
if(WIN32)
SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
else()
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
endif()
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
if(WIN32)
MESSAGE(WARNING
"Please download the MKLML and and put it at " ${THIRD_PARTY_PATH}/install/mklml)
else()
SET(MKLML_PROJECT "extern_mklml")
IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL))
MESSAGE(STATUS "use pre defined download url")
SET(MKLML_VER "mklml_lnx_2019.0.20180710" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
ENDIF()
MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}/include ${MKLML_VER}/lib \n"
" DESTINATION ${MKLML_DST_DIR})\n")
FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}/include ${MKLML_VER}/lib \n"
" DESTINATION ${MKLML_DST_DIR})\n")
ExternalProject_Add(
${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${MKLML_SOURCE_DIR}
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz
&& tar zxf ${MKLML_VER}.tgz
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
)
ExternalProject_Add(
${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${MKLML_SOURCE_DIR}
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz
&& tar zxf ${MKLML_VER}.tgz
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
)
endif()
INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
if(NOT WIN32)
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
endif()
LIST(APPEND external_project_dependencies mklml)
IF(WITH_C_API)

@ -23,9 +23,12 @@ FIND_PACKAGE(PythonLibs ${PY_VERSION})
if(WIN32)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"from distutils import sysconfig as s;import sys;import struct;
"from distutils import sysconfig as s;import sys;import struct;import sysconfig;
print(sys.prefix);
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(sysconfig.get_platform());
print(sysconfig.get_config_var('py_version_nodot'));
print(sysconfig.get_config_var('SOABI'));
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
@ -41,6 +44,9 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
list(GET _PYTHON_VALUES 0 PYTHON_PREFIX)
list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX)
list(GET _PYTHON_VALUES 2 SYS_PLATFORM)
list(GET _PYTHON_VALUES 3 PYTHON_SHORT_VERSION_NODOT)
list(GET _PYTHON_VALUES 4 PYTHON_SOABI)
# Make sure all directory separators are '/'
string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})

@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
# Used in unit test test_WarpCTCLayer
SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib"
CACHE PATH "Warp-ctc Library Directory" FORCE)
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" )
IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR WIN32)
SET(USE_OMP OFF)
ELSE()
SET(USE_OMP ON)
ENDIF()
IF(WIN32)
SET(WARPCTC_REPOSITORY "https://github.com/wopeizl/warp-ctc.git")
ELSE()
SET(WARPCTC_REPOSITORY "https://github.com/dzhwinter/warp-ctc.git")
ENDIF()
ExternalProject_Add(
extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/dzhwinter/warp-ctc.git"
GIT_REPOSITORY ${WARPCTC_REPOSITORY}
PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
-DWITH_GPU=${WITH_GPU}
-DWITH_OMP=${USE_OMP}
@ -59,6 +67,18 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
)
IF(WIN32)
IF(NOT EXISTS "${WARPCTC_INSTALL_DIR}/lib/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}")
add_custom_command(TARGET extern_warpctc POST_BUILD
COMMAND cmake -E copy ${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX} ${WARPCTC_INSTALL_DIR}/lib/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}
)
ENDIF()
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
else(WIN32)
SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}"
CACHE FILEPATH "Warp-ctc Library" FORCE)
ENDIF(WIN32)
MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}")
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers.

@ -13,8 +13,8 @@
# limitations under the License.
set(WITH_XBYAK ON)
if(WIN32 OR APPLE)
SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE)
if(APPLE)
SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in MacOS" FORCE)
return()
endif()

@ -267,7 +267,11 @@ function(cc_library TARGET_NAME)
list(APPEND cc_library_DEPS dynload_mklml)
endif()
add_dependencies(${TARGET_NAME} mklml)
target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
if(WIN32)
target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB})
else(WIN32)
target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif(WIN32)
endif()
# remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually

@ -115,20 +115,20 @@ if (NOT PROTOBUF_FOUND OR WIN32)
)
endif ()
if (NOT CBLAS_FOUND)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas")
copy(openblas_lib
SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include
DSTS ${dst_dir} ${dst_dir}
DEPS extern_openblas
)
elseif (WITH_MKLML)
if (WITH_MKLML)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mklml")
copy(mklml_lib
SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR}
DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}
DEPS mklml
)
elseif (NOT CBLAS_FOUND OR WIN32)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas")
copy(openblas_lib
SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include
DSTS ${dst_dir} ${dst_dir}
DEPS extern_openblas
)
endif ()
if (WITH_MKLDNN)

@ -84,7 +84,7 @@ function(op_library TARGET)
endif()
if (WIN32)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op")
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op")
if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
return()
endif()

@ -57,46 +57,43 @@ int main()
return 0;
}" SSE3_FOUND)
# disable AVX by default on windows
if(NOT WIN32)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}" AVX512F_FOUND)
endif(NOT WIN32)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}" AVX512F_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND)

@ -15,8 +15,7 @@ function(windows_symbolic TARGET)
file(GENERATE OUTPUT ${final_path}/.${src}.cu INPUT ${final_path}/${src}.cc)
add_custom_command(OUTPUT ${final_path}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E remove ${final_path}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E copy "${final_path}/${src}.cc" "${final_path}/.${src}.cu"
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu"
COMMENT "create hidden file of ${src}.cu")
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
endforeach()

@ -50,7 +50,7 @@ void AllReduceOpHandle::RunImpl() {
// FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR,
// this is a distributed or inter-process call, find a better way.
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (NoDummyInputSize() == 1 &&
local_scopes_[0]->FindLocalVar(NCCL_ID_VARNAME) == nullptr) {
#else

@ -215,8 +215,8 @@ class Vector {
auto stream = dev_ctx->stream();
void *src = gpu_->ptr();
void *dst = cpu_.data();
memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_->size(), stream);
paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_->size(), stream);
dev_ctx->Wait();
}
@ -261,8 +261,8 @@ class Vector {
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_->size(), stream);
paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_->size(), stream);
}
void ImmutableCPU() const {
@ -284,7 +284,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; }
mutable std::vector<T> cpu_;
mutable memory::AllocationPtr gpu_;
mutable paddle::memory::AllocationPtr gpu_;
mutable int flag_;
mutable std::mutex mtx_;

@ -23,7 +23,8 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/framework.pb.h"

@ -89,12 +89,21 @@ endif()
if(WITH_MKL)
include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
if(NOT WIN32)
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
else(WIN32)
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5md${CMAKE_SHARED_LIBRARY_SUFFIX})
endif(WIN32)
set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn")
if(EXISTS ${MKLDNN_PATH})
include_directories("${MKLDNN_PATH}/include")
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
if(WIN32)
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
else(WIN32)
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
endif(WIN32)
endif()
else()
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})

@ -17,7 +17,6 @@ limitations under the License. */
#ifdef _WIN32
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#include <sys/mman.h> // for mlock and munlock
#endif

@ -44,9 +44,8 @@ endif()
register_operators(EXCLUDES warpctc_op conv_fusion_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
# warpctc_op needs cudnn 7 above
if (WITH_GPU AND NOT WIN32)
if (WITH_GPU)
if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc)
else()
@ -64,9 +63,7 @@ endif()
set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor)
if (NOT WIN32)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
endif()
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions)
if (WITH_GPU)

@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"

@ -19,6 +19,9 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_ /* Prevent inclusion of winsock2.h */
#endif
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"

@ -17,6 +17,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#if defined(_WIN32)
#if defined(__AVX2__) || defined(__AVX__)
inline __m256 operator+=(__m256 a, __m256 b) { return _mm256_add_ps(a, b); }
#endif
#endif
namespace paddle {
namespace operators {
namespace math {

@ -18,6 +18,9 @@ limitations under the License. */
#include <type_traits>
#include "paddle/fluid/platform/macros.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_ /* Prevent inclusion of winsock2.h */
#endif
#define XBYAK_USE_MMAP_ALLOCATOR
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save