Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_sequence_padding_op

createGenDocLib
fengjiayi 7 years ago
commit 7e0c9f50ae

2
.gitignore vendored

@ -5,6 +5,7 @@ python/paddle/v2/fluid/tests/book/image_classification_resnet.inference.model/
python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/ python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/
python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/ python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/
*.DS_Store *.DS_Store
*.vs
build/ build/
build_doc/ build_doc/
*.user *.user
@ -15,6 +16,7 @@ build_doc/
.cproject .cproject
.pydevproject .pydevproject
.settings/ .settings/
CMakeSettings.json
Makefile Makefile
.test_env/ .test_env/
third_party/ third_party/

@ -24,6 +24,9 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
if(WIN32)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
endif(WIN32)
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
find_package(CUDA QUIET) find_package(CUDA QUIET)
@ -138,12 +141,6 @@ else()
set(THIRD_PARTY_BUILD_TYPE Release) set(THIRD_PARTY_BUILD_TYPE Release)
endif() endif()
if(WITH_MKL)
option(MKL_SPLIT_GEMM "PaddlePaddle MKL gemm would split to small ones" OFF)
if (MKL_SPLIT_GEMM)
add_definitions(-DPADDLE_MKL_SPLIT_GEMM)
endif()
endif()
set(WITH_MKLML ${WITH_MKL}) set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN) if (NOT DEFINED WITH_MKLDNN)
if (WITH_MKL AND AVX2_FOUND) if (WITH_MKL AND AVX2_FOUND)
@ -171,7 +168,6 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn include(external/mkldnn) # download, build, install mkldnn
include(external/swig) # download, build, install swig include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/boost) # download boost include(external/boost) # download boost
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
@ -179,6 +175,14 @@ include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
include(external/cub) include(external/cub)
if (NOT WIN32)
# there is no official support of snappystream, warpctc, nccl, cupti in windows
include(external/snappy) # download snappy
include(external/snappystream) # download snappystream
include(external/warpctc) # download, build, install warpctc
include(cupti)
endif (NOT WIN32)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
if(WITH_GRPC) if(WITH_GRPC)
include(external/grpc) include(external/grpc)
@ -200,26 +204,25 @@ if(WITH_BRPC_RDMA)
endif() endif()
endif() endif()
include(external/snappy) # download snappy
include(external/snappystream)
include(external/threadpool) include(external/threadpool)
include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration
set(WITH_ANAKIN OFF CACHE STRING "Disable Anakin first, will add it later." FORCE)
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
include(tensorrt) include(tensorrt)
include(external/anakin) include(external/anakin)
elseif()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in GPU only now." FORCE)
endif() endif()
include(cudnn) # set cudnn libraries, must before configure
include(cupti)
include(configure) # add paddle env configuration
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
include(util) # set unittest and link libs include(util) # set unittest and link libs
include(rdma) # set rdma libraries include(rdma) # set rdma libraries
include(flags) # set paddle compile flags
include(version) # set PADDLE_VERSION include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage include(coveralls) # set code coverage
include(inference_lib) # add paddle fluid inference libraries include(inference_lib) # add paddle fluid inference libraries

@ -50,13 +50,22 @@ if(NOT WITH_PROFILER)
endif(NOT WITH_PROFILER) endif(NOT WITH_PROFILER)
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
if(WITH_AVX AND AVX_FOUND) if(WITH_AVX AND AVX512F_FOUND)
set(SIMD_FLAG ${AVX512F_FLAG})
elseif(WITH_AVX AND AVX2_FOUND)
set(SIMD_FLAG ${AVX2_FLAG})
elseif(WITH_AVX AND AVX_FOUND)
set(SIMD_FLAG ${AVX_FLAG}) set(SIMD_FLAG ${AVX_FLAG})
elseif(SSE3_FOUND) elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG}) set(SIMD_FLAG ${SSE3_FLAG})
endif() endif()
endif() endif()
if(WIN32)
# windows stupid compile option for all targets.
add_definitions(-D_XKEYCHECK_H)
endif(WIN32)
if(NOT WITH_GOLANG) if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG) endif(NOT WITH_GOLANG)
@ -99,12 +108,21 @@ if(WITH_GPU)
endif() endif()
if(WITH_ANAKIN) if(WITH_ANAKIN)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 8) if(${CUDA_VERSION_MAJOR} VERSION_LESS 8)
message(FATAL_ERROR "Anakin needs CUDA >= 8.0 to compile") message(WARNING "Anakin needs CUDA >= 8.0 to compile. Force WITH_ANAKIN=OFF")
set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when CUDA >= 8.0." FORCE)
endif() endif()
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
message(FATAL_ERROR "Anakin needs CUDNN >= 7.0 to compile") message(WARNING "Anakin needs CUDNN >= 7.0 to compile. Force WITH_ANAKIN=OFF")
set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when CUDNN >= 7.0." FORCE)
endif() endif()
endif() endif()
if(WITH_ANAKIN)
# NOTICE(minqiyang): the end slash is important because $CUDNN_INCLUDE_DIR
# is a softlink to real cudnn.h directory
set(ENV{CUDNN_INCLUDE_DIR} "${CUDNN_INCLUDE_DIR}/")
get_filename_component(CUDNN_LIBRARY_DIR ${CUDNN_LIBRARY} DIRECTORY)
set(ENV{CUDNN_LIBRARY} ${CUDNN_LIBRARY_DIR})
endif()
elseif(WITH_AMD_GPU) elseif(WITH_AMD_GPU)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")

@ -25,8 +25,25 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
$ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64 $ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib $ENV{CUDNN_ROOT}/lib
/usr/lib) /usr/lib
find_library(CUDNN_LIBRARY NAMES libcudnn.so libcudnn.dylib # libcudnn_static.a ${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
)
set(CUDNN_LIB_NAME "")
if (LINUX)
set(CUDNN_LIB_NAME "libcudnn.so")
endif(LINUX)
if(WIN32)
# only support cudnn7
set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll")
endif(WIN32)
if(Apple)
set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so")
endif(Apple)
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist} PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH NO_DEFAULT_PATH
DOC "Path to cuDNN library.") DOC "Path to cuDNN library.")

@ -2,6 +2,11 @@ if (NOT WITH_ANAKIN)
return() return()
endif() endif()
option(ANAKIN_ENABLE_OP_TIMER "Get more detailed information with Anakin op time" OFF)
if(ANAKIN_ENABLE_OP_TIMER)
add_definitions(-DPADDLE_ANAKIN_ENABLE_OP_TIMER)
endif()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
set(ANAKIN_SOURCE_DIR ${THIRD_PARTY_PATH}/anakin) set(ANAKIN_SOURCE_DIR ${THIRD_PARTY_PATH}/anakin)
# the anakin install dir is only default one now # the anakin install dir is only default one now
@ -11,33 +16,45 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
set(ANAKIN_SHARED_LIB ${ANAKIN_LIBRARY}/libanakin.so) set(ANAKIN_SHARED_LIB ${ANAKIN_LIBRARY}/libanakin.so)
set(ANAKIN_SABER_LIB ${ANAKIN_LIBRARY}/libanakin_saber_common.so) set(ANAKIN_SABER_LIB ${ANAKIN_LIBRARY}/libanakin_saber_common.so)
# TODO(luotao): ANAKIN_MODLE_URL will move to demo ci later. # TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set(ANAKIN_MODLE_URL "http://paddle-inference-dist.bj.bcebos.com/mobilenet_v2.anakin.bin") set(INFERENCE_URL "http://paddle-inference-dist.bj.bcebos.com")
set(ANAKIN_MODLE_URL "${INFERENCE_URL}/mobilenet_v2.anakin.bin")
set(ANAKIN_RNN_MODLE_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn.anakin2.model.bin")
set(ANAKIN_RNN_DATA_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn_data.txt")
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_SOURCE_DIR}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_SOURCE_DIR}")
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL}") execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL} -N")
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_MODLE_URL} -N")
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_DATA_URL} -N")
include_directories(${ANAKIN_INCLUDE}) include_directories(${ANAKIN_INCLUDE})
include_directories(${ANAKIN_INCLUDE}/saber/) include_directories(${ANAKIN_INCLUDE}/saber/)
include_directories(${ANAKIN_INCLUDE}/saber/core/)
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/x86/)
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/cuda/base/cuda_c/)
set(ANAKIN_COMPILE_EXTRA_FLAGS set(ANAKIN_COMPILE_EXTRA_FLAGS
-Wno-error=unused-but-set-variable -Wno-unused-but-set-variable -Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
-Wno-error=unused-variable -Wno-unused-variable -Wno-error=unused-variable -Wno-unused-variable
-Wno-error=format-extra-args -Wno-format-extra-args -Wno-error=format-extra-args -Wno-format-extra-args
-Wno-error=comment -Wno-comment -Wno-error=comment -Wno-comment
-Wno-error=format -Wno-format -Wno-error=format -Wno-format
-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized
-Wno-error=switch -Wno-switch -Wno-error=switch -Wno-switch
-Wno-error=return-type -Wno-return-type -Wno-error=return-type -Wno-return-type
-Wno-error=non-virtual-dtor -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-non-virtual-dtor
-Wno-error=ignored-qualifiers
-Wno-ignored-qualifiers
-Wno-sign-compare -Wno-sign-compare
-Wno-reorder -Wno-reorder
-Wno-error=cpp) -Wno-error=cpp)
ExternalProject_Add( ExternalProject_Add(
extern_anakin extern_anakin
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
# TODO(luotao): use PaddlePaddle/Anakin later DEPENDS ${MKLML_PROJECT}
# Anakin codes error on Intel(R) Xeon(R) Gold 5117 CPU, temporary do not compile avx512 related code.
GIT_REPOSITORY "https://github.com/luotao1/Anakin" GIT_REPOSITORY "https://github.com/luotao1/Anakin"
GIT_TAG "3957ae9263eaa0b1986758dac60a88852afb09be" GIT_TAG "211d1fc5d813d70c0c14072f9083cf25f40940ea"
PREFIX ${ANAKIN_SOURCE_DIR} PREFIX ${ANAKIN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DUSE_GPU_PLACE=YES CMAKE_ARGS -DUSE_GPU_PLACE=YES
@ -46,6 +63,8 @@ ExternalProject_Add(
-DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf -DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf
-DMKLML_ROOT=${THIRD_PARTY_PATH}/install/mklml -DMKLML_ROOT=${THIRD_PARTY_PATH}/install/mklml
-DCUDNN_ROOT=${CUDNN_ROOT} -DCUDNN_ROOT=${CUDNN_ROOT}
-DCUDNN_INCLUDE_DIR=${CUDNN_INCLUDE_DIR}
-DENABLE_OP_TIMER=${ANAKIN_ENABLE_OP_TIMER}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ANAKIN_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ANAKIN_INSTALL_DIR}
) )

@ -28,7 +28,12 @@ if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE) set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
endif() endif()
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}") IF (WIN32)
MESSAGE(WARNING, "In windows, boost can not be downloaded automaticlly, please build it manually and put it at " ${THIRD_PARTY_PATH}install/boost)
else()
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
ENDIF(WIN32)
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
@ -36,12 +41,13 @@ set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
include_directories(${BOOST_INCLUDE_DIR}) include_directories(${BOOST_INCLUDE_DIR})
if (NOT WIN32)
ExternalProject_Add( ExternalProject_Add(
${BOOST_PROJECT} ${BOOST_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR} DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz DOWNLOAD_COMMAND wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz
&& tar zxf ${BOOST_TAR}.tar.gz && tar zxf ${BOOST_TAR}.tar.gz
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
PREFIX ${BOOST_SOURCES_DIR} PREFIX ${BOOST_SOURCES_DIR}
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
@ -49,8 +55,9 @@ ExternalProject_Add(
INSTALL_COMMAND "" INSTALL_COMMAND ""
UPDATE_COMMAND "" UPDATE_COMMAND ""
) )
endif(NOT WIN32)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0") if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32)
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c) set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";") file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(boost STATIC ${dummyfile}) add_library(boost STATIC ${dummyfile})

@ -18,7 +18,7 @@ SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags)
SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags)
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
IF(WIN32) IF(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ELSE(WIN32) ELSE(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
@ -45,7 +45,13 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
IF(WIN32)
IF(NOT EXISTS "${GFLAGS_INSTALL_DIR}/lib/libgflags.lib")
add_custom_command(TARGET extern_gflags POST_BUILD
COMMAND cmake -E rename ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib ${GFLAGS_INSTALL_DIR}/lib/libgflags.lib
)
ENDIF()
ENDIF(WIN32)
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags) ADD_DEPENDENCIES(gflags extern_gflags)
@ -60,3 +66,4 @@ IF(WITH_C_API)
INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib) INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib)
ENDIF() ENDIF()
ENDIF() ENDIF()

@ -60,6 +60,13 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
IF(WIN32)
IF(NOT EXISTS "${GLOG_INSTALL_DIR}/lib/libglog.lib")
add_custom_command(TARGET extern_glog POST_BUILD
COMMAND cmake -E rename ${GLOG_INSTALL_DIR}/lib/glog.lib ${GLOG_INSTALL_DIR}/lib/libglog.lib
)
ENDIF()
ENDIF(WIN32)
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})

@ -54,7 +54,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51" GIT_TAG "64e03a1939e0d526aa8e9f2e3f7dc0ad8d372944"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}

@ -17,20 +17,29 @@ IF(USE_EIGEN_FOR_BLAS)
ENDIF(USE_EIGEN_FOR_BLAS) ENDIF(USE_EIGEN_FOR_BLAS)
INCLUDE(cblas) INCLUDE(cblas)
# IF(WIN32 AND NOT ${CBLAS_FOUND})
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) SET(CBLAS_INCLUDE_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
SET(CBLAS_LIBRARIES SET(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE) CACHE FILEPATH "openblas library." FORCE)
ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS) ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS)
IF (WIN32)
SET(CBLAS_FOUND true)
MESSAGE(WARNING, "In windows, openblas only support msvc build, please build it manually and put it at " ${CBLAS_INSTALL_DIR})
ENDIF(WIN32)
IF (NOT WIN32)
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
SET(OPENBLAS_COMMIT "v0.2.20") SET(OPENBLAS_COMMIT "v0.2.20")
@ -69,7 +78,6 @@ IF(NOT ${CBLAS_FOUND})
ENDIF() ENDIF()
SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs)
ExternalProject_Add( ExternalProject_Add(
extern_openblas extern_openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
@ -84,9 +92,11 @@ IF(NOT ${CBLAS_FOUND})
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
) )
ELSE()
ENDIF(NOT WIN32)
SET(CBLAS_PROVIDER openblas) SET(CBLAS_PROVIDER openblas)
IF(WITH_C_API) IF(WITH_C_API)
INSTALL(DIRECTORY ${CBLAS_INC_DIR} DESTINATION third_party/openblas) INSTALL(DIRECTORY ${CBLAS_INCLUDE_DIR} DESTINATION third_party/openblas)
# Because libopenblas.a is a symbolic link of another library, thus need to # Because libopenblas.a is a symbolic link of another library, thus need to
# install the whole directory. # install the whole directory.
IF(ANDROID) IF(ANDROID)
@ -107,7 +117,8 @@ IF(NOT ${CBLAS_FOUND})
ENDIF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND})
MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}")
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) MESSAGE(STATUS "BLAS Include: ${CBLAS_INCLUDE_DIR}")
INCLUDE_DIRECTORIES(${CBLAS_INCLUDE_DIR})
# FIXME(gangliao): generate cblas target to track all high performance # FIXME(gangliao): generate cblas target to track all high performance
# linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas) # linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas)

@ -14,11 +14,14 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp # Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp
IF(NOT WIN32)
FIND_PACKAGE(Protobuf QUIET) FIND_PACKAGE(Protobuf QUIET)
ENDIF(NOT WIN32)
macro(UNSET_VAR VAR_NAME) macro(UNSET_VAR VAR_NAME)
UNSET(${VAR_NAME} CACHE) UNSET(${VAR_NAME} CACHE)
UNSET(${VAR_NAME}) UNSET(${VAR_NAME})
endmacro() endmacro()
UNSET_VAR(PROTOBUF_INCLUDE_DIR) UNSET_VAR(PROTOBUF_INCLUDE_DIR)
UNSET_VAR(PROTOBUF_FOUND) UNSET_VAR(PROTOBUF_FOUND)
UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE) UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE)
@ -94,12 +97,14 @@ macro(PROMPT_PROTOBUF_LIB)
SET(protobuf_DEPS ${ARGN}) SET(protobuf_DEPS ${ARGN})
MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}")
MESSAGE(STATUS "Protobuf-lite library: ${PROTOBUF_LITE_LIBRARY}")
MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}")
MESSAGE(STATUS "Protoc library: ${PROTOBUF_PROTOC_LIBRARY}")
MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}")
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
# Assuming that all the protobuf libraries are of the same type. # Assuming that all the protobuf libraries are of the same type.
IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") IF(${PROTOBUF_LIBRARY} MATCHES ${CMAKE_STATIC_LIBRARY_SUFFIX})
SET(protobuf_LIBTYPE STATIC) SET(protobuf_LIBTYPE STATIC)
ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$")
SET(protobuf_LIBTYPE SHARED) SET(protobuf_LIBTYPE SHARED)
@ -137,18 +142,25 @@ macro(SET_PROTOBUF_VERSION)
endmacro() endmacro()
set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf")
IF (WIN32)
SET(PROTOBUF_ROOT ${THIRD_PARTY_PATH}/install/protobuf)
MESSAGE(WARNING, "In windows, protobuf only support msvc build, please build it manually and put it at " ${PROTOBUF_ROOT})
ENDIF(WIN32)
if (NOT "${PROTOBUF_ROOT}" STREQUAL "") if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH) find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH)
find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_PROTOC_LIBRARY protoc libprotoc.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH) find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH)
if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE)
message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.")
SET(PROTOBUF_FOUND true)
SET_PROTOBUF_VERSION() SET_PROTOBUF_VERSION()
PROMPT_PROTOBUF_LIB() PROMPT_PROTOBUF_LIB()
else() else()
message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}.") message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}")
endif() endif()
endif() endif()
@ -239,6 +251,7 @@ IF(CMAKE_CROSSCOMPILING)
CACHE FILEPATH "protobuf executable." FORCE) CACHE FILEPATH "protobuf executable." FORCE)
ENDIF() ENDIF()
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE) build_protobuf(extern_protobuf FALSE)

@ -102,7 +102,6 @@ set(COMMON_FLAGS
-fno-omit-frame-pointer -fno-omit-frame-pointer
-Wall -Wall
-Wextra -Wextra
-Werror
-Wnon-virtual-dtor -Wnon-virtual-dtor
-Wdelete-non-virtual-dtor -Wdelete-non-virtual-dtor
-Wno-unused-parameter -Wno-unused-parameter
@ -115,6 +114,11 @@ set(COMMON_FLAGS
-Wno-error=terminate # Warning in PADDLE_ENFORCE -Wno-error=terminate # Warning in PADDLE_ENFORCE
) )
# https://github.com/PaddlePaddle/Paddle/issues/12773
if (NOT WIN32)
list(APPEND COMMON_FLAGS -Werror)
endif()
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
-fPIC -fPIC
-fno-omit-frame-pointer -fno-omit-frame-pointer
@ -142,6 +146,11 @@ else()
${GPU_COMMON_FLAGS}) ${GPU_COMMON_FLAGS})
endif() endif()
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif(UNIX AND NOT APPLE)
foreach(flag ${COMMON_FLAGS}) foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cflag(CMAKE_C_FLAGS ${flag})

@ -148,7 +148,8 @@ function(merge_static_libs TARGET_NAME)
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
) )
else() # general UNIX: use "ar" to extract objects and re-add to a common lib endif(APPLE)
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir) set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
foreach(lib ${libs}) foreach(lib ${libs})
@ -187,7 +188,36 @@ function(merge_static_libs TARGET_NAME)
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'` COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME} COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR}) WORKING_DIRECTORY ${target_DIR})
endif() endif(LINUX)
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
file(WRITE ${target_SRCS} "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
foreach(lib ${libs})
# Get the file names of the libraries to be merged
#if(NOT $<TARGET_FILE:${lib}> MATCHES "lib.*\\.lib")
# message("library" ${lib})
# set(libfiles ${libfiles} lib$<TARGET_FILE:${lib}>)
#else()
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
#endif()
endforeach()
# windows cmd return error in clean env.
# COMMAND del "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.lib ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs) endfunction(merge_static_libs)
function(cc_library TARGET_NAME) function(cc_library TARGET_NAME)
@ -195,6 +225,10 @@ function(cc_library TARGET_NAME)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WIN32)
# add libxxx.lib prefix in windows
set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}")
endif(WIN32)
if(cc_library_SRCS) if(cc_library_SRCS)
if(cc_library_SHARED OR cc_library_shared) # build *.so if(cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})

@ -101,6 +101,7 @@ if(WITH_MKLDNN)
) )
endif() endif()
if (NOT WIN32)
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")
copy(snappy_lib copy(snappy_lib
@ -120,15 +121,23 @@ if(NOT MOBILE_INFERENCE AND NOT RPI)
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
DEPS zlib) DEPS zlib)
endif() endif()
endif(NOT WIN32)
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid") set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
set(module "framework") set(module "framework")
if (NOT WIN32)
copy(framework_lib DEPS framework_py_proto copy(framework_lib DEPS framework_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
) )
else()
copy(framework_lib
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
)
endif(NOT WIN32)
set(module "memory") set(module "memory")
copy(memory_lib copy(memory_lib

@ -10,6 +10,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID
set(SSE3_FLAG "-msse3") set(SSE3_FLAG "-msse3")
set(AVX_FLAG "-mavx") set(AVX_FLAG "-mavx")
set(AVX2_FLAG "-mavx2") set(AVX2_FLAG "-mavx2")
set(AVX512F_FLAG "-mavx512f")
elseif(MSVC) elseif(MSVC)
set(MMX_FLAG "/arch:MMX") set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2") set(SSE2_FLAG "/arch:SSE2")
@ -81,5 +82,16 @@ int main()
return 0; return 0;
}" AVX2_FOUND) }" AVX2_FOUND)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_undefined_epi32();
return 0;
}" AVX512F_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND) mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND)

@ -1,7 +1,7 @@
# Distributed Training with NCCL2 # Distributed Training with NCCL2
We design a pattern that can enable training with `ParallelExecutor` and We design a pattern that can enable training with `ParallelExecutor` and
using [NCCL2](https://developer.nvidia.com/nccl) as it's collective use [NCCL2](https://developer.nvidia.com/nccl) as it's collective
communication library. communication library.
In `ParallelExecutor` we can use `AllReduce` or `Reduce` and `Broadcast` In `ParallelExecutor` we can use `AllReduce` or `Reduce` and `Broadcast`
@ -9,14 +9,14 @@ to do multi GPU training. And if we initialize NCCL2 communicators as
ranks in a distributed environment, we can simply run the `ParallelExecutor` ranks in a distributed environment, we can simply run the `ParallelExecutor`
as a distributed program! The only thing that may be different than in as a distributed program! The only thing that may be different than in
the single node version is that we need to broadcast the NCCL unique ID the single node version is that we need to broadcast the NCCL unique ID
to all the nodes, and initialize communicators using that ID, so NCCL2 to all the nodes and initialize communicators using that ID, so NCCL2
will know each other as ranks. can know each other as ranks.
To achieve this feature, we introduce a new operator: `gen_nccl_id` op, To achieve this feature, we introduce a new operator: `gen_nccl_id` op,
so we are ***not*** "bind to" running NCCL2 with MPI, we can run it in so we are ***not*** "bind to" running NCCL2 with MPI, we can run it in
what ever platform you like. whatever platform you like.
It have two running modes: It has two running modes:
1. Generate and broadcast mode, which should be used on trainer 0; 1. Generate and broadcast mode, which should be used on trainer 0;
1. Listen and fetch mode, which should be used on trainers other than 0. 1. Listen and fetch mode, which should be used on trainers other than 0.
@ -29,7 +29,7 @@ initialize NCCL communicator objects.
<img src="src/ncc2_design.png"> <img src="src/ncc2_design.png">
The above figure indicates the general process when training with NCCL2 The above figure indicates the general process when training with NCCL2
distributed. Each trainer have the number of communicators equal to the distributed. Each trainer has the number of communicators equal to the
number of GPUs, but the ranks should match the global ranks number: here number of GPUs, but the ranks should match the global ranks number: here
we have total 8 GPUs, so `nranks==8`, for each trainer, the ranks should we have total 8 GPUs, so `nranks==8`, for each trainer, the ranks should
be from 0 ~ 3 on trainer 0 and 4 ~ 7 on trainer 1. be from 0 ~ 3 on trainer 0 and 4 ~ 7 on trainer 1.

@ -28,7 +28,7 @@ def get_symbol(num_classes=10, **kwargs):
Varible here is actually a Symbol. Every basic Symbol will correspond to one Node, and every Node has its own NodeAttr. There is a op field in NodeAttr class, when a Symbol represents Variable(often input data), the op field is null. Varible here is actually a Symbol. Every basic Symbol will correspond to one Node, and every Node has its own AnyAttr. There is a op field in AnyAttr class, when a Symbol represents Variable(often input data), the op field is null.
Symbol contains a data member, std::vector<NodeEntry> outputs, and NodeEntry cantains a poniter to Node. We can follow the Node pointer to get all the Graph. Symbol contains a data member, std::vector<NodeEntry> outputs, and NodeEntry cantains a poniter to Node. We can follow the Node pointer to get all the Graph.

@ -36,19 +36,19 @@
<tbody> <tbody>
<tr> <tr>
<td>OpProtoMake定义 </td> <td>OpProtoMake定义 </td>
<td>`.cc`文件Backward Op不需要定义OpProtoMake </td> <td>.cc 文件Backward Op不需要定义OpProtoMake </td>
</tr> </tr>
<tr> <tr>
<td>Op定义 </td> <td>Op定义 </td>
<td> `.cc`文件</td> <td> .cc 文件</td>
</tr> </tr>
<tr> <tr>
<td>Kernel实现 </td> <td>Kernel实现 </td>
<td> CPU、CUDA共享Kernel实现在`.h`文件中否则CPU 实现在`.cc`文件中CUDA 实现在`.cu`文件中。</td> <td> CPU、CUDA共享Kernel实现在.h 文件中否则CPU 实现在.cc 文件中CUDA 实现在.cu 文件中。</td>
</tr> </tr>
<tr> <tr>
<td>注册Op </td> <td>注册Op </td>
<td> Op注册实现在`.cc`文件Kernel注册CPU实现在`.cc`文件中CUDA实现在`.cu`文件中</td> <td> Op注册实现在.cc 文件Kernel注册CPU实现在.cc 文件中CUDA实现在.cu 文件中</td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
@ -119,10 +119,29 @@ $$Out = scale*X$$
这个例子有`AddAttr<AttrType>("scale", "...").SetDefault(1.0);` : 增加`scale`系数作为参数属性并且设置默认值为1.0。 这个例子有`AddAttr<AttrType>("scale", "...").SetDefault(1.0);` : 增加`scale`系数作为参数属性并且设置默认值为1.0。
### 定义GradProtoMaker类
每个Op的必须有一个对应的GraProtoMaker若未定制对应前向Op的GradProtoMakerfluid提供了DefaultGradProtoMaker默认注册会使用全部输入输出包括Input, Output, Output@Grad等使用不需要的变量的会造成显存浪费。
下面示例定义了ScaleOp的GradProtoMaker。
```cpp
class ScaleGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("scale");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", GetAttr("scale"));
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
```
### 定义Operator类 ### 定义Operator类
下面的点实现了MulOp的定义 下面实现了MulOp的定义
```cpp ```cpp
class MulOp : public framework::OperatorWithKernel { class MulOp : public framework::OperatorWithKernel {
@ -334,3 +353,83 @@ ctest -R test_mul_op
- 注册Op时的类型名需要和该Op的名字一样。即不允许在`A_op.cc`里面,注册`REGISTER_OPERATOR(B, ...)`等,这将会导致单元测试出错。 - 注册Op时的类型名需要和该Op的名字一样。即不允许在`A_op.cc`里面,注册`REGISTER_OPERATOR(B, ...)`等,这将会导致单元测试出错。
- 如果Op没有实现CUDA Kernel请不要创建空的`*_op.cu`,这将会导致单元测试出错。 - 如果Op没有实现CUDA Kernel请不要创建空的`*_op.cu`,这将会导致单元测试出错。
- 如果多个Op依赖一些共用的函数可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。 - 如果多个Op依赖一些共用的函数可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。
### PADDLE_ENFORCE使用注意
实现Op时检查数据的合法性需要使用PADDLE_ENFORCE以及PADDLE_ENFORCE_EQ等宏定义基本格式如下
```
PADDLE_ENFORCE(表达式, 错误提示信息)
PADDLE_ENFORCE_EQ(比较对象A, 比较对象B, 错误提示信息)
```
如果表达式为真或者比较对象A=B则检查通过否则会终止程序运行向用户反馈相应的错误提示信息。
为了确保提示友好易懂,开发者需要注意其使用方法。
#### 总体原则
任何使用了PADDLE_ENFORCE与PADDLE_ENFORCE_**检查的地方,必须有详略得当的备注解释!**错误提示信息**不能为空!
#### 提示信息书写标准
1. [required] 哪里错了?为什么错了?
- 例如:`ValueError: Mismatched label shape`
2. [optional] 期望的输入是什么样的?实际的输入是怎样的?
- 例如:`Expected labels dimension=1. Received 4.`
3. [optional] 能否给出修改意见?
- 例如:`Suggested Fix:If your classifier expects one-hot encoding label,check your n_classes argument to the estimatorand/or the shape of your label.Otherwise, check the shape of your label.`
如果并非必要或者简洁的描述即可表达清楚以上要点,根据情况书写亦可。
##### FAQ 典型问题
1. 无报错信息或报错信息过于简单,不能给用户提供有效的提示!
问题示例1 :未写提示信息
```
PADDLE_ENFORCE(ctx->HasInput("X"), "");
```
问题示例2 :提示信息过于简单
```
PADDLE_ENFORCE(i != nullptr, "i must be set"); // i是什么
```
2. 在报错信息中使用开发人员定义的变量缩写,不易理解!
问题示例:
```
PADDLE_ENFORCE(forward_pd != nullptr,
"Fail to find eltwise_fwd_pd in device context"); //eltwise_fwd_pd用户可能看不懂
```
3. OP内部调用非法接口Op内部如果出现Output = ShareDataWith(Input)
问题示例:
```cpp
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *in = ctx.Input<framework::LoDTensor>("X");
out->ShareDataWith(*in);
```
Op内部如果出现Output = ShareDataWith(Input)相当于operator图的中有一条隐藏边连接了Input和Output这条边无法在图分析中表达引发基于图优化的错误。
4. OP实现的性能实践
调用了eigen的broadcast, chop等操作性能会比手写cuda kernel差几倍以上。此时cpu的实现可以复用eigengpu实现可以实现cuda kernel.
#### OP InferShape检查提示信息特别说明
- 检查输入输出变量,请统一遵循以下格式
`Input(变量名) of OP名 operator should not be null.`
正确示例:
```
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of LSTMP operator should not be null.");
```
- 反向Op的输入输出检查要写明反向Op的名字
正确示例:
```
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of LoDResetGrad opreator should not be null.");
```

@ -50,6 +50,33 @@ pop-up box, choose the current release branch and click "Run Build" button. You
* pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the * pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the
old version. you must change the version number before upload a new one. old version. you must change the version number before upload a new one.
### Publish wheel Packages for MacOS
You need to build the binary wheel package for MacOS before publishing, to
make sure that the package can be used by many versions of MacOS
(10.11, 10.12, 10.13) and different python installs (python.org, homebrew, etc.),
you must build the package ***exactly*** following below steps:
Build steps:
1. install python from python.org downloads, and make sure it's currently in use
in your system.
1. `export MACOSX_DEPLOYMENT_TARGET=10.11`, use `10.11` is enough for recent versions.
1. `git clone https://github.com/PaddlePaddle/Paddle.git && cd Paddle && mkdir build && cd build`
1. `cmake -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_SYSTEM_BLAS=OFF ..`, make sure the output of `cmake` command is using the correct python interpreter installed from python.org
1. `make -j`
1. `pip install delocate`
1. `mkdir fixed_wheel && delocate-wheel -w fixed_wheel python/dist/*.whl`
Then the whl under `fixed_wheel` is ready to upload.
Install steps:
1. run `pip install paddlepaddle...whl`
1. find the `libpython.dylib` that are currently in use:
- for python.org package installs, do nothing.
- for other python installs, find the path of `libpython*.dylib` and `export LD_LIBRARY_PATH=you path && DYLD_LIBRARY_PATH=your path`
## Publish Docker Images ## Publish Docker Images
Our CI tool will push latest images to DockerHub, so we only need to push a version tag like: Our CI tool will push latest images to DockerHub, so we only need to push a version tag like:

@ -7,7 +7,7 @@
Eigen Tensor模块对element-wise计算提供了强大的支持并且书写一份代码可以同时在CPU、GPU执行。但Eigen Tensor是一个正在开发中的模块因此可能测试不够完备文档较少。 Eigen Tensor模块对element-wise计算提供了强大的支持并且书写一份代码可以同时在CPU、GPU执行。但Eigen Tensor是一个正在开发中的模块因此可能测试不够完备文档较少。
关于Eigen Tensor模块的详细介绍请参考[文档1](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) 和[文档2](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md) 关于Eigen Tensor模块的详细介绍请参考[Eigen文档](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md)
## paddle::framework::Tensor ## paddle::framework::Tensor

@ -1,12 +1,12 @@
# Distributed Training with NCCL2 and RDMA # Distributed Training with NCCL2 and RDMA
When doing distributed multi-GPU training, network bandwith often becomes the When doing distributed multi-GPU training, network bandwidth often becomes the
bottle neck. We introduce a way to use NCCL2 to do such training job to bottleneck. We introduce a way to use NCCL2 to do such training job to
achieve best performace. achieve best performance.
## Prepare Hardwares with RDMA and Multiple GPUs ## Prepare Hardware with RDMA and Multiple GPUs
I'm using two Linux servers each of them is installed with 8 GPUs and I'm using two Linux servers each of them installed with 8 GPUs and
one 100Gb RDMA card. one 100Gb RDMA card.
Base environment is: Base environment is:
@ -25,7 +25,7 @@ In general, the steps including:
1. Use docker to run tests and make sure GPUs and RDMA can work inside 1. Use docker to run tests and make sure GPUs and RDMA can work inside
the container. the container.
I'll ommit section "Install GPU drivers" because we can find it easily I'll omit the section "Install GPU drivers" because we can find it easily
somewhere else. somewhere else.
### Install RDMA drivers ### Install RDMA drivers
@ -33,7 +33,7 @@ somewhere else.
For my case, I've got two machines with device For my case, I've got two machines with device
"Mellanox Technologies MT27700 Family [ConnectX-4]" installed. The OS was "Mellanox Technologies MT27700 Family [ConnectX-4]" installed. The OS was
"CentOS 7.4" and I updated the kernel to version 4.4 so that docker can "CentOS 7.4" and I updated the kernel to version 4.4 so that docker can
work with latest overlay2 filesystem. work with the latest overlay2 filesystem.
***NOTE: before you start, make sure you have a way to get a console ***NOTE: before you start, make sure you have a way to get a console
of the server other than ssh because we may need to re-configure the of the server other than ssh because we may need to re-configure the
@ -45,14 +45,14 @@ network device.***
1. Run `./mlnxofedinstall --add-kernel-support` in the software package. 1. Run `./mlnxofedinstall --add-kernel-support` in the software package.
1. Run `/etc/init.d/openibd restart` to make everything work, note that 1. Run `/etc/init.d/openibd restart` to make everything work, note that
this operation may cause the network goes down if you are using this this operation may cause the network goes down if you are using this
RDMA device as default network device and use ssh to login the server. RDMA device as default network device and use ssh to log in the server.
1. Re-configure the network interface, for example: 1. Re-configure the network interface, for example:
`ifconfig eth2 192.168.16.30/20 up`, then add routes if needed: `ifconfig eth2 192.168.16.30/20 up`, then add routes if needed:
`ip route add default via 192.168.16.1 dev eth2`. `ip route add default via 192.168.16.1 dev eth2`.
1. Do the same thing on the other node. 1. Do the same thing on the other node.
1. Use `ping` to test if the two nodes have typical ICMP connection. 1. Use `ping` to test if the two nodes have typical ICMP connection.
1. Use either `udaddy` or `ib_write_bw` to test the network connection is 1. Use either `udaddy` or `ib_write_bw` to test the network connection is
ready and have the desired bandwith. ready and have the desired bandwidth.
### Prepare Docker Image to Run RDMA Programs ### Prepare Docker Image to Run RDMA Programs
@ -60,7 +60,7 @@ network device.***
package in it. package in it.
1. Start a docker container and mount GPU driver libs into it (you can 1. Start a docker container and mount GPU driver libs into it (you can
skip this step if you are using nvidia-docker). skip this step if you are using nvidia-docker).
1. Mount RDMA dirvers and libs into the docker image (see below section), 1. Mount RDMA drivers and libs into the docker image (see below section),
also `udaddy` and `ib_write_bw` if needed. also `udaddy` and `ib_write_bw` if needed.
1. Mount GPU devices and RDMA devices into the container using `--device` 1. Mount GPU devices and RDMA devices into the container using `--device`
or just use privileged mode `--privileged`. or just use privileged mode `--privileged`.

@ -4,13 +4,11 @@ Paddle 预测 API
为了更简单方便的预测部署Fluid 提供了一套高层 API 为了更简单方便的预测部署Fluid 提供了一套高层 API
用来隐藏底层不同的优化实现。 用来隐藏底层不同的优化实现。
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/contrib/inference>`__ `预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/inference/api>`__
包括 包括
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口 - 头文件 ``paddle_inference_api.h`` 定义了所有的接口
- 库文件\ ``libpaddle_fluid.so````libpaddle_fluid.a`` - 库文件\ ``libpaddle_fluid.so````libpaddle_fluid.a``
- 库文件 ``libpaddle_inference_api.so``
``libpaddle_inference_api.a``
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib`
@ -97,12 +95,11 @@ engine
CHECK(predictor->Run(slots, &outputs)); CHECK(predictor->Run(slots, &outputs));
// 获取 outputs ... // 获取 outputs ...
编译时,联编 ``libpaddle_fluid.a/.so`` 编译时,联编 ``libpaddle_fluid.a/.so`` 即可。
``libpaddle_inference_api.a/.so`` 便可。
详细代码参考 详细代码参考
------------ ------------
- `inference - `inference
demos <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/contrib/inference/demo>`__ demos <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/inference/api/demo_ci>`__
- `复杂单线程/多线程例子 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/contrib/inference/test_paddle_inference_api_impl.cc>`__ - `复杂单线程/多线程例子 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/api/api_impl_tester.cc>`__

File diff suppressed because one or more lines are too long

@ -2,9 +2,13 @@ add_subdirectory(memory)
add_subdirectory(platform) add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(pybind)
add_subdirectory(string) add_subdirectory(string)
if (NOT WIN32)
add_subdirectory(pybind)
add_subdirectory(recordio) add_subdirectory(recordio)
endif(NOT WIN32)
if(WITH_INFERENCE) if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last. # NOTE: please add subdirectory inference at last.
add_subdirectory(inference) add_subdirectory(inference)

@ -1,5 +1,7 @@
add_subdirectory(details)
add_subdirectory(ir) add_subdirectory(ir)
if (NOT WIN32)
add_subdirectory(details)
endif (NOT WIN32)
# ddim lib # ddim lib
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
@ -28,8 +30,12 @@ if(WITH_GPU)
else() else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor) cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
endif() endif()
if (NOT WIN32)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio)
else()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto)
endif (NOT WIN32)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
@ -69,14 +75,22 @@ cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
if (NOT WIN32)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler) shape_inference data_transform lod_tensor profiler)
else()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor)
endif(NOT WIN32)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
if (NOT WIN32)
py_proto_compile(framework_py_proto SRCS framework.proto) py_proto_compile(framework_py_proto SRCS framework.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
@ -86,6 +100,7 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/ COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
COMMENT "Copy generated python proto into directory paddle/fluid/proto." COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif(NOT WIN32)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
@ -99,8 +114,13 @@ else()
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method)
endif() endif()
if (NOT WIN32)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS threaded_ssa_graph_executor scope_buffered_ssa_graph_executor graph graph_viz_pass multi_devices_graph_pass multi_devices_graph_print_pass multi_devices_graph_check_pass) cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fast_threaded_ssa_graph_executor)
endif() # NOT WIN32
cc_library(prune SRCS prune.cc DEPS framework_proto) cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
@ -115,6 +135,10 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
# cc_test(channel_test SRCS channel_test.cc) # cc_test(channel_test SRCS channel_test.cc)
cc_test(tuple_test SRCS tuple_test.cc ) cc_test(tuple_test SRCS tuple_test.cc )
if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32)
# disable test temporarily. # disable test temporarily.
# TODO https://github.com/PaddlePaddle/Paddle/issues/11971 # TODO https://github.com/PaddlePaddle/Paddle/issues/11971
# cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op # cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save