Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into shufflechannel

inference-pre-release-gpu
shippingwang 6 years ago
commit 14f2a1060d

@ -55,6 +55,7 @@ option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_JEMALLOC "Compile PaddlePaddle with jemalloc" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
@ -261,6 +262,12 @@ if (WITH_PROFILER)
add_definitions(-DWITH_GPERFTOOLS)
endif()
if (WITH_JEMALLOC)
find_package(JeMalloc REQUIRED)
include_directories(${JEMALLOC_INCLUDE_DIR})
add_definitions(-DWITH_JEMALLOC)
endif()
include(generic) # simplify cmake module
include(package) # set paddle packages
include(ccache) # set ccache for compilation
@ -290,7 +297,7 @@ if(WITH_PSLIB)
list(APPEND EXTERNAL_LIBS pslib_brpc)
list(APPEND EXTERNAL_LIBS libmct)
endif(WITH_PSLIB)
if(WITH_AMD_GPU)
find_package(HIP)
include(hip)

@ -128,7 +128,7 @@ Please install pre-commit, which automatically reformat the changes to C/C++ and
Please remember to add related unit tests.
- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/Primer.md).
- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) .
- For Python code, please use [Python's standard `unittest` package](http://pythontesting.net/framework/unittest/unittest-introduction/).

@ -94,52 +94,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed.
RUN pip3 install -U wheel && \
pip3 install -U docopt PyYAML sphinx==1.5.6 && \
pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
pip3.6 install -U wheel && \
pip3.6 install -U docopt PyYAML sphinx==1.5.6 && \
pip3.6 install sphinx-rtd-theme==0.1.9 recommonmark && \
pip3.7 install -U wheel && \
pip3.7 install -U docopt PyYAML sphinx==1.5.6 && \
pip3.7 install sphinx-rtd-theme==0.1.9 recommonmark && \
RUN pip3 --no-cache-dir install -U wheel && \
pip3 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
pip3 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
pip3.6 --no-cache-dir install -U wheel && \
pip3.6 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
pip3.6 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
pip3.7 --no-cache-dir install -U wheel && \
pip3.7 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
pip3.7 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
easy_install -U pip && \
pip install -U pip setuptools wheel && \
pip install -U docopt PyYAML sphinx==1.5.6 && \
pip install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip3 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3 install opencv-python && \
pip3.6 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip3.6 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3.6 install opencv-python && \
pip3.7 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip3.7 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3.7 install opencv-python && \
pip install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install opencv-python
pip --no-cache-dir install -U pip setuptools wheel && \
pip --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
pip --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip3 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip3 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3 --no-cache-dir install opencv-python && \
pip3.6 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip3.6 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3.6 --no-cache-dir install opencv-python && \
pip3.7 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip3.7 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip3.7 --no-cache-dir install opencv-python && \
pip --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
pip --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip --no-cache-dir install opencv-python
#For docstring checker
RUN pip3 install pylint pytest astroid isort
RUN pip3.6 install pylint pytest astroid isort
RUN pip3.7 install pylint pytest astroid isort
RUN pip install pylint pytest astroid isort LinkChecker
RUN pip3 --no-cache-dir install pylint pytest astroid isort
RUN pip3.6 --no-cache-dir install pylint pytest astroid isort
RUN pip3.7 --no-cache-dir install pylint pytest astroid isort
RUN pip --no-cache-dir install pylint pytest astroid isort LinkChecker
COPY ./python/requirements.txt /root/
RUN pip3 install -r /root/requirements.txt
RUN pip3.6 install -r /root/requirements.txt
RUN pip3.7 install -r /root/requirements.txt
RUN pip install -r /root/requirements.txt
RUN pip3 --no-cache-dir install -r /root/requirements.txt
RUN pip3.6 --no-cache-dir install -r /root/requirements.txt
RUN pip3.7 --no-cache-dir install -r /root/requirements.txt
RUN pip --no-cache-dir install -r /root/requirements.txt
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN apt-get install -y libssl-dev libffi-dev
RUN pip3 install certifi urllib3[secure]
RUN pip3.6 install certifi urllib3[secure]
RUN pip3.7 install certifi urllib3[secure]
RUN pip install certifi urllib3[secure]
RUN apt-get install -y libssl-dev libffi-dev && apt-get clean -y
RUN pip3 --no-cache-dir install certifi urllib3[secure]
RUN pip3.6 --no-cache-dir install certifi urllib3[secure]
RUN pip3.7 --no-cache-dir install certifi urllib3[secure]
RUN pip --no-cache-dir install certifi urllib3[secure]
# Install woboq_codebrowser to /woboq
@ -149,6 +149,14 @@ RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \
-DCMAKE_BUILD_TYPE=Release . \
make)
# ar mishandles 4GB files
# https://sourceware.org/bugzilla/show_bug.cgi?id=14625
# remove them when apt-get support 2.27 and higher version
RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/2.27-9ubuntu1/binutils_2.27.orig.tar.gz && \
tar -xzf binutils_2.27.orig.tar.gz && \
cd binutils-2.27 && \
./configure && make -j && make install && cd .. && rm -rf binutils-2.27 binutils_2.27.orig.tar.gz
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd

@ -0,0 +1,28 @@
# - Find JeMalloc library
# Find the native JeMalloc includes and library
#
# JEMALLOC_INCLUDE_DIR - where to find jemalloc.h, etc.
# JEMALLOC_LIBRARIES - List of libraries when using jemalloc.
# JEMALLOC_FOUND - True if jemalloc found.
find_path(JEMALLOC_INCLUDE_DIR
NAMES jemalloc/jemalloc.h
HINTS ${JEMALLOC_ROOT_DIR}/include)
find_library(JEMALLOC_LIBRARIES
NAMES jemalloc
HINTS ${JEMALLOC_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(jemalloc DEFAULT_MSG JEMALLOC_LIBRARIES JEMALLOC_INCLUDE_DIR)
mark_as_advanced(
JEMALLOC_LIBRARIES
JEMALLOC_INCLUDE_DIR)
if (JEMALLOC_FOUND)
add_library(jemalloc::jemalloc UNKNOWN IMPORTED)
set_target_properties(jemalloc::jemalloc PROPERTIES
IMPORTED_LOCATION ${JEMALLOC_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES "${JEMALLOC_INCLUDE_DIR}")
endif()

@ -134,6 +134,7 @@ if(WITH_GPU)
message(WARNING "Anakin needs CUDNN >= 7.0 to compile. Force WITH_ANAKIN=OFF")
set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when CUDNN >= 7.0." FORCE)
endif()
add_definitions(-DWITH_ANAKIN)
endif()
if(WITH_ANAKIN)
# NOTICE(minqiyang): the end slash is important because $CUDNN_INCLUDE_DIR

@ -5,6 +5,8 @@ endif()
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs7 "30 35 50 52")
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
######################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
@ -59,7 +61,7 @@ endfunction()
# select_nvcc_arch_flags(out_variable)
function(select_nvcc_arch_flags out_variable)
# List of arch names
set(archs_names "Kepler" "Maxwell" "Pascal" "All" "Manual")
set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual")
set(archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND archs_names "Auto")
@ -93,6 +95,8 @@ function(select_nvcc_arch_flags out_variable)
set(cuda_arch_bin "60 61")
elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
set(cuda_arch_bin "70")
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(cuda_arch_bin ${paddle_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
@ -139,10 +143,12 @@ endfunction()
message(STATUS "CUDA detected: " ${CUDA_VERSION})
if (${CUDA_VERSION} LESS 7.0)
set(paddle_known_gpu_archs ${paddle_known_gpu_archs})
add_definitions("-DPADDLE_CUDA_BINVER=\"60\"")
elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs7})
list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
add_definitions("-DPADDLE_CUDA_BINVER=\"70\"")
elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs8})
list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
@ -150,6 +156,17 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets")
add_definitions("-DPADDLE_CUDA_BINVER=\"80\"")
elseif (${CUDA_VERSION} LESS 10.0) # CUDA 9.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs9})
list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
add_definitions("-DPADDLE_CUDA_BINVER=\"90\"")
elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
add_definitions("-DPADDLE_CUDA_BINVER=\"100\"")
endif()
include_directories(${CUDA_INCLUDE_DIRS})

@ -89,6 +89,7 @@ if(CUDNN_FOUND)
if(NOT CUDNN_MAJOR_VERSION)
set(CUDNN_VERSION "???")
else()
add_definitions("-DPADDLE_CUDNN_BINVER=\"${CUDNN_MAJOR_VERSION}\"")
math(EXPR CUDNN_VERSION
"${CUDNN_MAJOR_VERSION} * 1000 +
${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")

@ -23,11 +23,8 @@ set(BOOST_PROJECT "extern_boost")
# checked that the devtools package of CentOS 6 installs boost 1.41.0.
# So we use 1.41.0 here.
set(BOOST_VER "1.41.0")
if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
message(STATUS "use pre defined download url")
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
endif()
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")

@ -32,4 +32,4 @@ endif()
add_dependencies(cub extern_cub)
LIST(APPEND externl_project_dependencies cub)
LIST(APPEND external_project_dependencies cub)

@ -28,4 +28,4 @@ endif()
add_dependencies(dlpack extern_dlpack)
LIST(APPEND externl_project_dependencies dlpack)
LIST(APPEND external_project_dependencies dlpack)

@ -63,6 +63,15 @@ ADD_DEPENDENCIES(gflags extern_gflags)
LIST(APPEND external_project_dependencies gflags)
# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
if (WIN32)
include(CheckIncludeFileCXX)
check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
if (HAVE_SHLWAPI)
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
endif(HAVE_SHLWAPI)
endif (WIN32)
IF(WITH_C_API)
INSTALL(DIRECTORY ${GFLAGS_INCLUDE_DIR} DESTINATION third_party/gflags)
IF(ANDROID)

@ -55,7 +55,7 @@ ExternalProject_Add(
${MKLDNN_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_REPOSITORY "https://github.com/intel/mkl-dnn.git"
GIT_TAG "830a10059a018cd2634d94195140cf2d8790a75a"
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
@ -106,10 +106,10 @@ else(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0)
ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB}
COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB}
DEPENDS mkldnn)
DEPENDS mkldnn shared_mkldnn)
endif(WIN32)
ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB})
ADD_DEPENDENCIES(mkldnn_shared_lib ${MKLDNN_PROJECT} mkldnn)
IF(WITH_C_API)
INSTALL(FILES ${MKLDNN_SHARED_LIB} DESTINATION lib)
ENDIF()

@ -17,10 +17,8 @@ IF(NOT ${WITH_MKLML})
ENDIF(NOT ${WITH_MKLML})
IF(APPLE)
MESSAGE(WARNING
"Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
MESSAGE(WARNING "Mac is not supported with MKLML in Paddle yet. Force WITH_MKLML=OFF.")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in MacOS" FORCE)
return()
ENDIF()
@ -31,29 +29,24 @@ SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR})
SET(MKLML_ROOT ${MKLML_INSTALL_DIR})
SET(MKLML_INC_DIR ${MKLML_ROOT}/include)
SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib)
if(WIN32)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
SET(TIME_VERSION "2019.0.1.20181227")
IF(WIN32)
SET(MKLML_VER "mklml_win_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
else()
ELSE()
SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
endif()
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL))
MESSAGE(STATUS "use pre defined download url")
if(WIN32)
SET(MKLML_VER "mklml_win_2019.0.20180710" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
else()
SET(MKLML_VER "mklml_lnx_2019.0.20180710" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
ENDIF()
endif()
ENDIF()
SET(MKLML_PROJECT "extern_mklml")
MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}")

@ -37,15 +37,18 @@ INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject)
SET(NGRAPH_PROJECT "extern_ngraph")
SET(NGRAPH_VERSION "0.9")
SET(NGRAPH_GIT_TAG "f9fd9d4cc318dc59dd4b68448e7fbb5f67a28bd0")
SET(NGRAPH_GIT_TAG "20bd8bbc79ae3a81c57313846a2be7313e5d1dab")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR})
SET(NGRAPH_SHARED_LIB_NAME libngraph.so.${NGRAPH_VERSION})
SET(NGRAPH_SHARED_LIB_NAME libngraph.so)
SET(NGRAPH_CPU_LIB_NAME libcpu_backend.so)
SET(NGRAPH_TBB_LIB_NAME libtbb.so.2)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
SET(NGRAPH_TBB_LIB_NAME libtbb_debug.so.2)
else()
SET(NGRAPH_TBB_LIB_NAME libtbb.so.2)
endif()
SET(NGRAPH_GIT_REPO "https://github.com/NervanaSystems/ngraph.git")
SET(NGRAPH_SHARED_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_SHARED_LIB_NAME})
SET(NGRAPH_CPU_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_CPU_LIB_NAME})
@ -67,16 +70,7 @@ ExternalProject_Add(
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
)
# Workaround for nGraph expecting mklml to be in mkldnn install directory.
ExternalProject_Add_Step(
${NGRAPH_PROJECT}
PrepareMKL
COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_LIB} ${MKLDNN_INSTALL_DIR}/lib/libmklml_intel.so
COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_IOMP_LIB} ${MKLDNN_INSTALL_DIR}/lib/libiomp5.so
DEPENDEES download
DEPENDERS configure
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
)
add_dependencies(ngraph ${NGRAPH_PROJECT})

@ -115,6 +115,10 @@ function(common_link TARGET_NAME)
if (WITH_PROFILER)
target_link_libraries(${TARGET_NAME} gperftools::profiler)
endif()
if (WITH_JEMALLOC)
target_link_libraries(${TARGET_NAME} jemalloc::jemalloc)
endif()
endfunction()
@ -228,7 +232,7 @@ function(merge_static_libs TARGET_NAME)
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
# msvc will put libarary in directory of "/Release/xxxlib" by default
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND cmake -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}"
@ -355,6 +359,8 @@ function(cc_binary TARGET_NAME)
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS})
common_link(${TARGET_NAME})
endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
endfunction(cc_binary)
function(cc_test TARGET_NAME)
@ -363,18 +369,15 @@ function(cc_test TARGET_NAME)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
if(WIN32)
list(APPEND win32_deps shlwapi)
if("${cc_test_DEPS};" MATCHES "python;")
list(REMOVE_ITEM cc_test_DEPS python)
list(APPEND win32_deps ${PYTHON_LIBRARIES})
target_link_libraries(${TARGET_NAME} ${PYTHON_LIBRARIES})
endif()
endif(WIN32)
add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
if(WIN32)
target_link_libraries(${TARGET_NAME} ${win32_deps})
endif(WIN32)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(NAME ${TARGET_NAME}
@ -447,7 +450,8 @@ function(nv_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
@ -534,7 +538,8 @@ function(hip_test TARGET_NAME)
endif()
add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})

@ -136,7 +136,7 @@ if (WITH_MKLDNN)
copy(mkldnn_lib
SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
DSTS ${dst_dir} ${dst_dir}/lib
DEPS mkldnn
DEPS mkldnn_shared_lib
)
endif ()

@ -110,7 +110,7 @@ function(op_library TARGET)
# Define operators that don't need pybind here.
foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op"
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
"fusion_transpose_flatten_concat_op")
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()

@ -57,46 +57,43 @@ int main()
return 0;
}" SSE3_FOUND)
# disable AVX by default on windows
if(NOT WIN32)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}" AVX_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}" AVX512F_FOUND)
endif(NOT WIN32)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}" AVX512F_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND)

@ -60,7 +60,7 @@ class Float16Transpiler:
raise TypeError("place should be as CPUPlace/CUDAPlace type")
if scope is None:
scope = global_scope()
if not isinstance(scope, core.Scope):
if not isinstance(scope, core._Scope):
raise TypeError("scope should be as Scope type or None")
self.scope = scope

@ -26,10 +26,6 @@ paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], vara
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None)
@ -47,6 +43,12 @@ paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'start
paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False))
paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.CompiledProgram.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.CompiledProgram.with_data_parallel ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
@ -88,6 +90,7 @@ paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'poo
paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
paddle.fluid.layers.data_norm ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'use_mkldnn', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, False, None, None, None, False))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
@ -211,6 +214,7 @@ paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', '
paddle.fluid.layers.shuffle_channel ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.teacher_student_sigmoid_loss ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0))
paddle.fluid.layers.huber_loss ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
@ -406,28 +410,50 @@ paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.SGDOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdamOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdamaxOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.DecayedAdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None))
paddle.fluid.optimizer.FtrlOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.FtrlOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.RMSPropOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdadeltaOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None))
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.optimizer.ModelAverage.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.ModelAverage.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.LarsMomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
@ -465,11 +491,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)

@ -7,27 +7,17 @@ function(windows_symbolic TARGET)
cmake_parse_arguments(windows_symbolic "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(final_path ${CMAKE_CURRENT_SOURCE_DIR}/${windows_symbolic_PATH})
foreach(src ${windows_symbolic_SRCS})
get_filename_component(src ${src} NAME_WE)
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cu)
message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
endif()
#only copy the xx.cu to.xx.cu when the content are modified
set(copy_flag 1)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc SOURCE_STR)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu TARGET_STR)
if (SOURCE_STR STREQUAL TARGET_STR)
set(copy_flag 0)
endif()
endif()
if (copy_flag)
add_custom_command(OUTPUT .${src}.cu
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc" "${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu"
COMMENT "create hidden file of ${src}.cu")
endif(copy_flag)
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
get_filename_component(src ${src} NAME_WE)
if (NOT EXISTS ${final_path}/${src}.cc OR NOT EXISTS ${final_path}/${src}.cu)
message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
endif()
file(GENERATE OUTPUT ${final_path}/.${src}.cu INPUT ${final_path}/${src}.cc)
add_custom_command(OUTPUT ${final_path}/.${src}.cu
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu"
COMMENT "create hidden file of ${src}.cu")
add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)
endforeach()
endfunction()
@ -37,9 +27,10 @@ add_subdirectory(details)
proto_library(framework_proto SRCS framework.proto)
proto_library(async_executor_proto SRCS data_feed.proto)
cc_library(ddim SRCS ddim.cc DEPS eigen3 boost)
cc_library(ddim SRCS ddim.cc DEPS eigen3 boost enforce)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc)
cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
if(WITH_GPU)
@ -78,17 +69,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library(reader SRCS reader.cc DEPS lod_tensor ddim)
cc_test(reader_test SRCS reader_test.cc DEPS reader)
cc_test(variable_test SRCS variable_test.cc)
cc_library(threadpool SRCS threadpool.cc DEPS enforce)
cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(scope SRCS scope.cc DEPS glog threadpool)
cc_library(var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto)
if (WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda)
endif()
cc_test(var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits)
cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash var_type_traits)
cc_library(scope_pool SRCS scope_pool.cc DEPS scope)
cc_test(scope_test SRCS scope_test.cc DEPS scope)
cc_test(variable_test SRCS variable_test.cc DEPS tensor var_type_traits)
cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)
nv_test(data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function)
DEPS operator op_registry device_context math_function scope)
if(WITH_GPU)
if (WIN32)
@ -133,11 +130,9 @@ cc_test(version_test SRCS version_test.cc DEPS version)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
if(WITH_NGRAPH)
if(NOT WIN32)
cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler ngraph)
endif(NOT WIN32)
cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler)
endif(WITH_NGRAPH)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
@ -179,11 +174,7 @@ if(WITH_DISTRIBUTE)
else()
if(WITH_NGRAPH)
if(NOT WIN32)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph ngraph_operator variable_helper)
else(NOT WIN32)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(NOT WIN32)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph_operator variable_helper)
else(WITH_NGRAPH)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(WITH_NGRAPH)
@ -193,14 +184,14 @@ endif()
target_link_libraries(executor garbage_collector)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor parallel_ssa_graph_executor
graph build_strategy
fast_threaded_ssa_graph_executor variable_helper)
if(WITH_PSLIB)
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib)
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib timer)
else()
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper)
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper timer)
endif(WITH_PSLIB)

@ -15,34 +15,123 @@
#pragma once
#include <cstdint>
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/framework/unroll_array_ops.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T, size_t N>
class Array {
static_assert(N > 0, "The size of array must be larger than 0");
public:
HOSTDEVICE Array() {}
static constexpr size_t kSize = N;
HOSTDEVICE inline Array() {}
HOSTDEVICE explicit Array(const T &val) {
for (size_t i = 0; i < N; ++i) data_[i] = val;
template <typename... Args>
HOSTDEVICE inline explicit Array(const T &val, Args... args) {
static_assert(N == sizeof...(Args) + 1, "Invalid argument");
UnrollVarArgsAssign<T>::Run(data_, val, args...);
}
HOSTDEVICE const T *Get() const { return data_; }
HOSTDEVICE inline void Fill(const T &val) {
UnrollFillConstant<N>::Run(data_, val);
}
HOSTDEVICE T *GetMutable() { return data_; }
HOSTDEVICE inline const T *Get() const { return data_; }
HOSTDEVICE T &operator[](size_t index) { return data_[index]; }
HOSTDEVICE inline T *GetMutable() { return data_; }
HOSTDEVICE const T &operator[](size_t index) const { return data_[index]; }
HOSTDEVICE inline T &operator[](size_t i) { return *advance(data_, i); }
// Writing "return data_[i]" would cause compilation warning/error:
// "array subscript is above array bound" in Python 35 CI.
// It seems that it is a false warning of GCC if we do not check the bounds
// of array index. But for better performance, we do not check in operator[]
// like what is in STL. If users want to check the bounds, use at() instead
HOSTDEVICE inline const T &operator[](size_t i) const {
return *advance(data_, i);
}
HOSTDEVICE inline T &at(size_t i) {
#ifndef __CUDA_ARCH__
PADDLE_ENFORCE_LT(i, N, "Array index out of bounds");
#endif
return (*this)[i];
}
HOSTDEVICE inline const T &at(size_t i) const {
#ifndef __CUDA_ARCH__
PADDLE_ENFORCE_LT(i, N, "Array index out of bounds");
#endif
return (*this)[i];
}
HOSTDEVICE constexpr size_t size() const { return N; }
HOSTDEVICE inline bool operator==(const Array<T, N> &other) const {
return UnrollCompare<N>::Run(data_, other.data_);
}
HOSTDEVICE inline bool operator!=(const Array<T, N> &other) const {
return !(*this == other);
}
private:
template <typename U>
HOSTDEVICE static inline U *advance(U *ptr, size_t i) {
return ptr + i;
}
T data_[N];
};
template <typename T>
class Array<T, 0> {
public:
static constexpr size_t kSize = 0;
HOSTDEVICE inline Array() {}
HOSTDEVICE inline void Fill(const T &val) {}
HOSTDEVICE inline constexpr T *Get() const { return nullptr; }
// Add constexpr to GetMutable() cause warning in MAC
HOSTDEVICE inline T *GetMutable() { return nullptr; }
HOSTDEVICE inline T &operator[](size_t) {
#ifdef __CUDA_ARCH__
static T obj();
return obj;
#else
PADDLE_THROW("Array<T, 0> has no element");
#endif
}
HOSTDEVICE inline const T &operator[](size_t) const {
#ifdef __CUDA_ARCH__
static const T obj();
return obj;
#else
PADDLE_THROW("Array<T, 0> has no element");
#endif
}
HOSTDEVICE inline T &at(size_t i) { return (*this)[i]; }
HOSTDEVICE inline const T &at(size_t i) const { return (*this)[i]; }
HOSTDEVICE constexpr size_t size() const { return 0; }
HOSTDEVICE constexpr bool operator==(const Array<T, 0> &other) const {
return true;
}
HOSTDEVICE constexpr bool operator!=(const Array<T, 0> &other) const {
return false;
}
};
} // namespace framework
} // namespace paddle

@ -304,8 +304,13 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
// start executing ops in multiple threads
for (int thidx = 0; thidx < actual_thread_num; ++thidx) {
threads.push_back(
std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get()));
if (debug) {
threads.push_back(std::thread(&ExecutorThreadWorker::TrainFilesWithTimer,
workers[thidx].get()));
} else {
threads.push_back(
std::thread(&ExecutorThreadWorker::TrainFiles, workers[thidx].get()));
}
}
for (auto& th : threads) {

@ -165,7 +165,7 @@ template <typename T>
class GreaterThanChecker {
public:
explicit GreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
void operator()(T& value) const {
void operator()(const T& value) const {
PADDLE_ENFORCE(value > lower_bound_, "larger_than check fails.");
}
@ -177,7 +177,7 @@ template <typename T>
class EqualGreaterThanChecker {
public:
explicit EqualGreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
void operator()(T& value) const {
void operator()(const T& value) const {
PADDLE_ENFORCE_GE(value, lower_bound_, "equal_larger_than check fails.");
}
@ -193,7 +193,7 @@ class DefaultValueSetter {
public:
explicit DefaultValueSetter(T default_value)
: default_value_(default_value) {}
void operator()(T& value) const { value = default_value_; } // NOLINT
void operator()(T* value) const { *value = default_value_; }
private:
T default_value_;
@ -203,7 +203,7 @@ template <typename T>
class EnumInContainer {
public:
explicit EnumInContainer(const std::unordered_set<T>& c) : container_(c) {}
void operator()(T& val) const {
void operator()(const T& val) const {
PADDLE_ENFORCE(container_.find(val) != container_.end(),
"Value %s is not in enum container %s", val,
ContainerDebugString());
@ -232,7 +232,8 @@ class EnumInContainer {
// an attribute can have more than one limits
template <typename T>
class TypedAttrChecker {
typedef std::function<void(T&)> ValueChecker;
typedef std::function<void(T*)> DefaultValueChecker;
typedef std::function<void(const T&)> ValueChecker;
public:
explicit TypedAttrChecker(const std::string& attr_name)
@ -268,17 +269,17 @@ class TypedAttrChecker {
return *this;
}
void operator()(AttributeMap& attr_map) const { // NOLINT
if (!attr_map.count(attr_name_)) {
void operator()(AttributeMap* attr_map) const {
if (!attr_map->count(attr_name_)) {
// user do not set this attr
PADDLE_ENFORCE(!default_value_setter_.empty(),
"Attribute '%s' is required!", attr_name_);
// default_value_setter_ has no more than one element
T val;
(default_value_setter_[0])(val);
attr_map[attr_name_] = val;
(default_value_setter_[0])(&val);
(*attr_map)[attr_name_] = val;
}
Attribute& attr = attr_map.at(attr_name_);
Attribute& attr = attr_map->at(attr_name_);
ExtractAttribute<T> extract_attr(attr_name_);
T* attr_value = extract_attr(attr);
for (const auto& checker : value_checkers_) {
@ -289,12 +290,12 @@ class TypedAttrChecker {
private:
std::string attr_name_;
std::vector<ValueChecker> value_checkers_;
std::vector<ValueChecker> default_value_setter_;
std::vector<DefaultValueChecker> default_value_setter_;
};
// check whether op's all attributes fit their own limits
class OpAttrChecker {
typedef std::function<void(AttributeMap&)> AttrChecker;
typedef std::function<void(AttributeMap*)> AttrChecker;
public:
template <typename T>
@ -304,7 +305,7 @@ class OpAttrChecker {
return *(checker.target<TypedAttrChecker<T>>());
}
void Check(AttributeMap& attr_map) const { // NOLINT
void Check(AttributeMap* attr_map) const {
for (const auto& checker : attr_checkers_) {
checker(attr_map);
}

@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save