diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a6b0a20e4..c7d743e193 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -74,6 +76,10 @@ if(ANDROID) "Disable PYTHON when cross-compiling for Android" FORCE) set(WITH_RDMA OFF CACHE STRING "Disable RDMA when cross-compiling for Android" FORCE) + set(WITH_MKLDNN OFF CACHE STRING + "Disable MKLDNN when cross-compiling for Android" FORCE) + set(WITH_MKLML OFF CACHE STRING + "Disable MKLML package when cross-compiling for Android" FORCE) endif(ANDROID) set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING @@ -87,6 +93,7 @@ endif() ######################################################################################## +include(external/mklml) # download mklml package include(external/zlib) # download, build, install zlib include(external/gflags) # download, build, install gflags include(external/glog) # download, build, install glog @@ -94,6 +101,7 @@ include(external/gtest) # download, build, install gtest include(external/protobuf) # download, build, install protobuf include(external/python) # download, build, install python include(external/openblas) # download, build, install openblas +include(external/mkldnn) # download, build, install mkldnn include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any @@ -135,6 +143,10 @@ if(WITH_GPU) endif(NOT WITH_DSO) endif(WITH_GPU) +if(WITH_MKLDNN) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) +endif() + if(USE_NNPACK) include(external/nnpack) list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 913f711aff..854066fd1d 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -15,23 +15,44 @@ set(CBLAS_FOUND OFF) -## Find MKL First. -set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") -set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") +## Find MKLML First. +if(WITH_MKLML AND MKLML_INC_DIR AND MKLML_LIB) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER MKLML) + set(CBLAS_INC_DIR ${MKLML_INC_DIR}) + set(CBLAS_LIBRARIES ${MKLML_LIB}) + + add_definitions(-DPADDLE_USE_MKLML) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found cblas and lapack in MKLML " + "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + return() +endif() + +## Then find MKL. +set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs") +set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL") + +set(MKL_INCLUDE_SEARCH_PATHS + ${MKL_ROOT}/include + ${INTEL_MKL_ROOT}/include) +set(MKL_LIB_SEARCH_PATHS + ${MKL_ROOT}/lib + ${MKL_ROOT}/lib/intel64 + ${INTEL_MKL_ROOT}/lib + ${INTEL_MKL_ROOT}/lib/intel64) find_path(MKL_INC_DIR mkl.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_library(MKL_CORE_LIB NAMES mkl_core PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) set(CBLAS_FOUND ON) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 7afab5d534..69220e03fe 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -67,6 +67,30 @@ else() include_directories(${CUDA_TOOLKIT_INCLUDE}) endif(NOT WITH_GPU) +if(WITH_MKLDNN) + add_definitions(-DPADDLE_USE_MKLDNN) + if (WITH_MKLML AND MKLDNN_IOMP_DIR) + message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}") + set(OPENMP_FLAGS "-fopenmp") + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") + else() + find_package(OpenMP) + if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else() + message(WARNING "Can not find OpenMP." + "Some performance features in MKLDNN may not be available") + endif() + endif() + +endif(WITH_MKLDNN) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 77e06e983e..e3970073a1 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -34,9 +34,15 @@ IF(WITH_TESTING) "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) ENDIF(WIN32) + IF(WITH_MKLML) + # wait for mklml downloading completed + SET(GTEST_DEPENDS ${MKLML_PROJECT}) + ENDIF() + ExternalProject_Add( extern_gtest ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${GTEST_DEPENDS} GIT_REPOSITORY "https://github.com/google/googletest.git" GIT_TAG "release-1.8.0" PREFIX ${GTEST_SOURCES_DIR} diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000..eff15de73f --- /dev/null +++ b/cmake/external/mkldnn.cmake @@ -0,0 +1,72 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLDNN}) + return() +ENDIF(NOT ${WITH_MKLDNN}) + +INCLUDE(ExternalProject) + +SET(MKLDNN_PROJECT "extern_mkldnn") +SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) +SET(MKLDNN_INSTALL_ROOT ${CMAKE_INSTALL_PREFIX}) +IF(NOT "$ENV{HOME}" STREQUAL "/root") + SET(MKLDNN_INSTALL_ROOT "$ENV{HOME}") +ENDIF() + +SET(MKLDNN_INSTALL_DIR "${MKLDNN_INSTALL_ROOT}/opt/paddle/third_party/mkldnn") +SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) + +IF(WIN32) + MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet." + "Force WITH_MKLDNN=OFF") + SET(WITH_MKLDNN OFF) + return() +ELSE(WIN32) + SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) + MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") + SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + #SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS + SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR}) + +IF(${CBLAS_PROVIDER} STREQUAL "MKLML") + SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) + SET(MKLDNN_MKLROOT ${MKLML_ROOT}) + SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) + SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) +ENDIF() + +ExternalProject_Add( + ${MKLDNN_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${MKLDNN_DEPENDS} + GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" + GIT_TAG "v0.9" + PREFIX ${MKLDNN_SOURCES_DIR} + CONFIGURE_COMMAND mkdir -p /build + BUILD_COMMAND cd /build + && cmake .. -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKLDNN_MKLROOT} + && $(MAKE) + INSTALL_COMMAND cd /build && $(MAKE) install + UPDATE_COMMAND "" +) + +ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIBRARY}") +LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake new file mode 100644 index 0000000000..3f940756a4 --- /dev/null +++ b/cmake/external/mklml.cmake @@ -0,0 +1,64 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLML}) + return() +ENDIF(NOT ${WITH_MKLML}) + +INCLUDE(ExternalProject) + +SET(MKLML_PROJECT "extern_mklml") +SET(MKLML_VER "mklml_lnx_2018.0.20170425") +SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") +SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") +SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") +SET(MKLML_DST_DIR "opt/paddle/third_party/mklml") +SET(MKLML_INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}") +IF(NOT "$ENV{HOME}" STREQUAL "/root") + SET(MKLML_INSTALL_ROOT "$ENV{HOME}") +ENDIF() + +SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) +SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER}) +SET(MKLML_INC_DIR ${MKLML_ROOT}/include) +SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib) +SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) +SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") + +INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) + +SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt) +FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${MKLML_VER}\n" + " DESTINATION ${MKLML_DST_DIR})\n") + +ExternalProject_Add( + ${MKLML_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${MKLML_SOURCE_DIR} + DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} + && tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} +) + +ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) +ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +LIST(APPEND external_project_dependencies mklml) diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index 7045562dd4..c8ba1074a1 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -202,7 +202,7 @@ double dotProduct(const int n, const double* x, const double* y) { return cblas_ddot(n, x, 1, y, 1); } -#ifdef PADDLE_USE_MKL +#if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML) template <> void vExp(const int n, const float* a, float* r) { @@ -243,7 +243,55 @@ template <> void vAdd(const int n, const double* a, const double* b, double* r) { vdAdd(n, a, b, r); } +#else + +DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); +template +void vExp(const int n, const T* a, T* r) { + hl_cpu_apply_binary_op, 0, 0>( + binary::vExp(), const_cast(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); +template +void vLog(const int n, const T* a, T* r) { + hl_cpu_apply_binary_op, 0, 0>( + binary::vLog(), const_cast(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); +template +void vPow(const int n, const T* a, const T b, T* r) { + hl_cpu_apply_binary_op, 0, 0>( + binary::vPow(b), const_cast(a), r, 1, n, n, n); +} + +DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); +template +void vAdd(const int n, const T* a, const T* b, T* r) { + hl_cpu_apply_ternary_op, 0, 0>(ternary::vAdd(), + const_cast(a), + const_cast(b), + r, + 1, + n, + n, + n, + n); +} + +template void vExp(const int n, const float* a, float* r); +template void vExp(const int n, const double* a, double* r); +template void vLog(const int n, const float* a, float* r); +template void vLog(const int n, const double* a, double* r); +template void vPow(const int n, const float* a, const float b, float* r); +template void vPow(const int n, const double* a, const double b, double* r); +template void vAdd(const int n, const float* a, const float* b, float* r); +template void vAdd(const int n, const double* a, const double* b, double* r); +#endif + +#ifdef PADDLE_USE_MKL template <> void vInvSqrt(const int n, const float* a, float* r) { vsInvSqrt(n, a, r); @@ -275,20 +323,6 @@ void vTanh(const int n, const double* a, double* r) { } #else -DEFINE_MATRIX_BINARY_OP(vExp, b = std::exp(a)); -template -void vExp(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vExp(), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_BINARY_OP(vLog, b = std::log(a)); -template -void vLog(const int n, const T* a, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vLog(), const_cast(a), r, 1, n, n, n); -} - DEFINE_MATRIX_BINARY_OP(vInvSqrt, b = 1.0f / std::sqrt(a)); template void vInvSqrt(const int n, const T* a, T* r) { @@ -312,41 +346,12 @@ void vTanh(const int n, const T* a, T* r) { binary::vTanh(), const_cast(a), r, 1, n, n, n); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(vPow, ONE_PARAMETER, b = std::pow(a, p)); -template -void vPow(const int n, const T* a, const T b, T* r) { - hl_cpu_apply_binary_op, 0, 0>( - binary::vPow(b), const_cast(a), r, 1, n, n, n); -} - -DEFINE_MATRIX_TERNARY_OP(vAdd, c = a + b); -template -void vAdd(const int n, const T* a, const T* b, T* r) { - hl_cpu_apply_ternary_op, 0, 0>(ternary::vAdd(), - const_cast(a), - const_cast(b), - r, - 1, - n, - n, - n, - n); -} - -template void vExp(const int n, const float* a, float* r); -template void vExp(const int n, const double* a, double* r); -template void vLog(const int n, const float* a, float* r); -template void vLog(const int n, const double* a, double* r); template void vInvSqrt(const int n, const double* a, double* r); template void vInvSqrt(const int n, const float* a, float* r); template void vLog1p(const int n, const float* a, float* r); template void vLog1p(const int n, const double* a, double* r); template void vTanh(const int n, const float* a, float* r); template void vTanh(const int n, const double* a, double* r); -template void vPow(const int n, const float* a, const float b, float* r); -template void vPow(const int n, const double* a, const double b, double* r); -template void vAdd(const int n, const float* a, const float* b, float* r); -template void vAdd(const int n, const double* a, const double* b, double* r); #endif diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 8ada0d34c6..637643838f 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -15,6 +15,12 @@ limitations under the License. */ #ifndef MATHFUNCTIONS_H_ #define MATHFUNCTIONS_H_ +#ifdef PADDLE_USE_MKLML +#include +#include +#include +#endif + #ifdef PADDLE_USE_MKL #include #include