From 11cb75f8bbf10f55ed831d21ae3a4fecf97fe632 Mon Sep 17 00:00:00 2001 From: zlx Date: Mon, 28 Aug 2017 11:36:40 +0800 Subject: [PATCH 01/22] add the ios cross compile --- CMakeLists.txt | 17 ++- cmake/cross_compiling/ios.cmake | 207 ++++++++++++++++++++++++++++++++ cmake/external/python.cmake | 6 +- cmake/system.cmake | 3 + ios_run.sh | 17 +++ 5 files changed, 246 insertions(+), 4 deletions(-) create mode 100644 cmake/cross_compiling/ios.cmake create mode 100644 ios_run.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index dcd1218a5b..f0a01680aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ if(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING) find_package(Git REQUIRED) find_package(Threads REQUIRED) -if(NOT ANDROID) +if(NOT ANDROID AND NOT IOS) find_package(Boost QUIET) endif() @@ -63,6 +63,21 @@ if(NOT CMAKE_BUILD_TYPE) FORCE) endif() +if(IOS) + set(WITH_GPU OFF CACHE STRING + "Disable GPU when cross-compiling for Android" FORCE) + set(WITH_AVX OFF CACHE STRING + "Disable AVX when cross-compiling for Android" FORCE) + set(WITH_PYTHON OFF CACHE STRING + "Disable PYTHON when cross-compiling for Android" FORCE) + set(WITH_RDMA OFF CACHE STRING + "Disable RDMA when cross-compiling for Android" FORCE) + set(WITH_MKLDNN OFF CACHE STRING + "Disable MKLDNN when cross-compiling for Android" FORCE) + set(WITH_MKLML OFF CACHE STRING + "Disable MKLML package when cross-compiling for Android" FORCE) +endif(IOS) + if(ANDROID) if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21") diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake new file mode 100644 index 0000000000..b179e29b25 --- /dev/null +++ b/cmake/cross_compiling/ios.cmake @@ -0,0 +1,207 @@ +# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake +# files which are included with CMake 2.8.4 +# It has been altered for iOS development + +# Options: +# +# IOS_PLATFORM = OS (default) or SIMULATOR +# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders +# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. +# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. +# +# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder +# By default this location is automatcially chosen based on the IOS_PLATFORM value above. +# If set manually, it will override the default location and force the user of a particular Developer Platform +# +# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder +# By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. +# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path. +# If set manually, this will force the use of a specific SDK version + +# Macros: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) +# A convenience macro for setting xcode specific properties on targets +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the iOS environment. +# Thanks to the android-cmake project for providing the command + +# Standard settings +# set (CMAKE_SYSTEM_NAME Darwin) +set (CMAKE_SYSTEM_VERSION 1) +set (UNIX True) +set (APPLE True) +set (IOS True) + +# Required as of cmake 2.8.10 +set (CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) + +# Determine the cmake host system version so we know where to find the iOS SDKs +find_program (CMAKE_UNAME uname /bin /usr/bin /usr/local/bin) +if (CMAKE_UNAME) + exec_program(uname ARGS -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION) + string (REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}") +endif (CMAKE_UNAME) + +# Force the compilers to gcc for iOS +set (CMAKE_C_COMPILER /usr/bin/gcc) +set (CMAKE_CXX_COMPILER /usr/bin/g++) +set(CMAKE_AR ar CACHE FILEPATH "" FORCE) +set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) +set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE) + +# Setup iOS platform unless specified manually with IOS_PLATFORM +if (NOT DEFINED IOS_PLATFORM) + set (IOS_PLATFORM "OS") +endif (NOT DEFINED IOS_PLATFORM) +set (IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") + +# Check the platform selection and setup for developer root +if (${IOS_PLATFORM} STREQUAL "OS") + set (IOS_PLATFORM_LOCATION "iPhoneOS.platform") + set (XCODE_IOS_PLATFORM iphoneos) + + # This causes the installers to properly locate the output libraries + set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") +elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR") + set (SIMULATOR true) + set (IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + set (XCODE_IOS_PLATFORM iphonesimulator) + + # This causes the installers to properly locate the output libraries + set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +elseif (${IOS_PLATFORM} STREQUAL "WATCHOS") + set (IOS_PLATFORM_LOCATION "WatchOS.platform") + set (XCODE_IOS_PLATFORM watchos) + + # This causes the installers to properly locate the output libraries + set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") +else (${IOS_PLATFORM} STREQUAL "OS") + message (FATAL_ERROR + "Unsupported IOS_PLATFORM value selected. " + "Please choose OS, SIMULATOR, or WATCHOS.") +endif () + +# All iOS/Darwin specific settings - some may be redundant +set (CMAKE_SHARED_LIBRARY_PREFIX "lib") +set (CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set (CMAKE_SHARED_MODULE_PREFIX "lib") +set (CMAKE_SHARED_MODULE_SUFFIX ".so") +set (CMAKE_MODULE_EXISTS 1) +set (CMAKE_DL_LIBS "") + +set (CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set (CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set (CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set (CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if (IOS_DEPLOYMENT_TARGET) + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() + +# Hidden visibilty is required for cxx on iOS +set (CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}") +set (CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden") + +set (CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") +set (CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + +set (CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set (CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") +set (CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +set (CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set (CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set (CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree +# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache +# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) +# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex +if (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + +# Setup iOS deployment target +set (IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") + +# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT +# Note Xcode 4.3 changed the installation location, choose the most recent one available +exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR) +set (XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +set (XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +if (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) + if (EXISTS ${XCODE_POST_43_ROOT}) + set (CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT}) + elseif(EXISTS ${XCODE_PRE_43_ROOT}) + set (CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT}) + endif (EXISTS ${XCODE_POST_43_ROOT}) +endif (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) +set (CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") + +# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT +if (NOT DEFINED CMAKE_IOS_SDK_ROOT) + file (GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*") + if (_CMAKE_IOS_SDKS) + list (SORT _CMAKE_IOS_SDKS) + list (REVERSE _CMAKE_IOS_SDKS) + list (GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT) + else (_CMAKE_IOS_SDKS) + message (FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.") + endif (_CMAKE_IOS_SDKS) + message (STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}") +endif (NOT DEFINED CMAKE_IOS_SDK_ROOT) +set (CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + +# Set the sysroot default to the most recent SDK +set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +# set the architecture for iOS +if (IOS_PLATFORM STREQUAL "OS") + set (IOS_ARCH "armv7;armv7s;arm64") +elseif (IOS_PLATFORM STREQUAL "SIMULATOR") + set (IOS_ARCH "i386;x86_64") +elseif (IOS_PLATFORM STREQUAL "WATCHOS") + set (IOS_ARCH "armv7k") +endif () + +set (CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") + +# Set the find root to the iOS developer roots and to user defined paths +set (CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE string "iOS find search path root") + +# default to searching for frameworks first +set (CMAKE_FIND_FRAMEWORK FIRST) + +# set up the default search directories for frameworks +set (CMAKE_SYSTEM_FRAMEWORK_PATH + ${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks + ${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks + ${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks +) + +# only search the iOS sdks, not the remainder of the host filesystem +set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +# This little macro lets you set any XCode specific property +macro (set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) + set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) +endmacro (set_xcode_property) + +# This macro lets you find executable programs on the host system +macro (find_host_package) + set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set (IOS FALSE) + + find_package(${ARGN}) + + set (IOS TRUE) + set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro (find_host_package) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 490c87d67e..53ef7cd29c 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -15,9 +15,9 @@ INCLUDE(ExternalProject) INCLUDE(python_module) -FIND_PACKAGE(PythonInterp 2.7) +FIND_HOST_PACKAGE(PythonInterp 2.7) IF(WITH_PYTHON) - FIND_PACKAGE(PythonLibs 2.7) + FIND_HOST_PACKAGE(PythonLibs 2.7) # Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. ADD_LIBRARY(python SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) @@ -29,7 +29,7 @@ IF(PYTHONINTERP_FOUND) find_python_module(numpy REQUIRED) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) - FIND_PACKAGE(NumPy REQUIRED) + FIND_HOST_PACKAGE(NumPy REQUIRED) IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " "please use pip to upgrade protobuf. pip install -U protobuf") diff --git a/cmake/system.cmake b/cmake/system.cmake index adf5e2c539..7462802d21 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -82,6 +82,9 @@ IF(DEFINED CMAKE_SYSTEM_NAME) ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "RPi") SET(RPI TRUE) INCLUDE(cross_compiling/raspberry_pi) + ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + SET(IOS TRUE) + INCLUDE(cross_compiling/ios) ENDIF() ENDIF() diff --git a/ios_run.sh b/ios_run.sh new file mode 100644 index 0000000000..b8325f15fc --- /dev/null +++ b/ios_run.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -xe + +mkdir -p ./ios_build +cd ./ios_build + +cmake -DCMAKE_SYSTEM_NAME=Darwin \ + -DWITH_C_API=ON \ + -DWITH_TESTING=OFF \ + -DWITH_SWIG_PY=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/Users/xingzhaolong/cross_compile/ios \ + .. + # -DIOS_PLATFORM=SIMULATOR \ + #-DCMAKE_Go_COMPILER=/usr/local/bin \ + From 43dde9353bf642dba113030327473554744ced0a Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 29 Aug 2017 19:49:41 +0800 Subject: [PATCH 02/22] Refine the cross-compiling toolchain file for iOS. --- cmake/cross_compiling/ios.cmake | 422 ++++++++++++++++++++------------ cmake/flags.cmake | 6 +- cmake/system.cmake | 13 +- 3 files changed, 274 insertions(+), 167 deletions(-) diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index b179e29b25..135104e69e 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -1,207 +1,311 @@ -# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake -# files which are included with CMake 2.8.4 -# It has been altered for iOS development - -# Options: +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 # +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is a toolchain file for cross-compiling for iOS, and the +# configuration largely refers to public toolchain file: +# https://raw.githubusercontent.com/leetal/ios-cmake/master/ios.toolchain.cmake +# and +# https://github.com/cristeab/ios-cmake +# +# Supports options: # IOS_PLATFORM = OS (default) or SIMULATOR # This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders # OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. # SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. -# -# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder +# IOS_ARCH +# The archectures wanted to support, such "arm64", "armv7;arm64" +# IOS_DEPLOYMENT_TARGET +# The minimum iOS deployment version, such as "7.0" +# IOS_ENABLE_BITCODE = ON (default) or OFF +# IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder # By default this location is automatcially chosen based on the IOS_PLATFORM value above. # If set manually, it will override the default location and force the user of a particular Developer Platform -# -# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder -# By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. -# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path. +# IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder +# By default this location is automatcially chosen based on the IOS_DEVELOPER_ROOT value. +# In this case it will always be the most up-to-date SDK found in the IOS_DEVELOPER_ROOT path. # If set manually, this will force the use of a specific SDK version # Macros: -# # set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) # A convenience macro for setting xcode specific properties on targets # example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") -# # find_host_package (PROGRAM ARGS) # A macro used to find executable programs on the host system, not within the iOS environment. # Thanks to the android-cmake project for providing the command -# Standard settings -# set (CMAKE_SYSTEM_NAME Darwin) -set (CMAKE_SYSTEM_VERSION 1) -set (UNIX True) -set (APPLE True) -set (IOS True) +if(NOT IOS) + return() +endif() + +set(CMAKE_SYSTEM_NAME Darwin) +#set(UNIX ON) +#set(APPLE ON) + +# Get the Xcode version being used. +execute_process(COMMAND xcodebuild -version + OUTPUT_VARIABLE XCODE_VERSION + RESULT_VARIABLE XCODE_VERSION_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT ${XCODE_VERSION_RESULT}) + string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}") + string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}") + message(STATUS "Building with Xcode version: ${XCODE_VERSION}") +else() + message(FATAL_ERROR "Cannot execute xcodebuild, please check whether xcode is installed.") +endif() # Required as of cmake 2.8.10 -set (CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) - -# Determine the cmake host system version so we know where to find the iOS SDKs -find_program (CMAKE_UNAME uname /bin /usr/bin /usr/local/bin) -if (CMAKE_UNAME) - exec_program(uname ARGS -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION) - string (REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}") -endif (CMAKE_UNAME) - -# Force the compilers to gcc for iOS -set (CMAKE_C_COMPILER /usr/bin/gcc) -set (CMAKE_CXX_COMPILER /usr/bin/g++) +set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) + set(CMAKE_AR ar CACHE FILEPATH "" FORCE) set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE) # Setup iOS platform unless specified manually with IOS_PLATFORM -if (NOT DEFINED IOS_PLATFORM) - set (IOS_PLATFORM "OS") -endif (NOT DEFINED IOS_PLATFORM) -set (IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") +if(NOT DEFINED IOS_PLATFORM) + set(IOS_PLATFORM "OS") +endif() +set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") + +# Set the architecture for iOS +if(NOT DEFINED IOS_ARCH) + if(IOS_PLATFORM STREQUAL "OS") + set(IOS_ARCH "armv7;armv7s;arm64") + elseif(IOS_PLATFORM STREQUAL "SIMULATOR") + set(IOS_ARCH "i386;x86_64") + elseif(IOS_PLATFORM STREQUAL "WATCHOS") + set(IOS_ARCH armv7k) + endif() +endif() +set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") + +# Specify minimum iOS deployment version +if(NOT DEFINED IOS_DEPLOYMENT_TARGET) + set(IOS_DEPLOYMENT_TARGET "7.0") +endif() +set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") + +# Whether to enable bitcode +if(NOT DEFINED IOS_ENABLE_BITCODE) + set(IOS_ENABLE_BITCODE ON) +endif() +set(IOS_ENABLE_BITCODE ${IOS_ENABLE_BITCODE} CACHE BOOL "Whether to enable bitcode") # Check the platform selection and setup for developer root -if (${IOS_PLATFORM} STREQUAL "OS") - set (IOS_PLATFORM_LOCATION "iPhoneOS.platform") - set (XCODE_IOS_PLATFORM iphoneos) - - # This causes the installers to properly locate the output libraries - set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") -elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR") - set (SIMULATOR true) - set (IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") - set (XCODE_IOS_PLATFORM iphonesimulator) - - # This causes the installers to properly locate the output libraries - set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") -elseif (${IOS_PLATFORM} STREQUAL "WATCHOS") - set (IOS_PLATFORM_LOCATION "WatchOS.platform") - set (XCODE_IOS_PLATFORM watchos) - - # This causes the installers to properly locate the output libraries - set (CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") -else (${IOS_PLATFORM} STREQUAL "OS") - message (FATAL_ERROR - "Unsupported IOS_PLATFORM value selected. " - "Please choose OS, SIMULATOR, or WATCHOS.") -endif () - -# All iOS/Darwin specific settings - some may be redundant -set (CMAKE_SHARED_LIBRARY_PREFIX "lib") -set (CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") -set (CMAKE_SHARED_MODULE_PREFIX "lib") -set (CMAKE_SHARED_MODULE_SUFFIX ".so") -set (CMAKE_MODULE_EXISTS 1) -set (CMAKE_DL_LIBS "") - -set (CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") -set (CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") -set (CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") -set (CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") - -if (IOS_DEPLOYMENT_TARGET) - set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") +if(${IOS_PLATFORM} STREQUAL "OS") + set(IOS_PLATFORM_LOCATION "iPhoneOS.platform") + set(XCODE_IOS_PLATFORM iphoneos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") +elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR") + set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + set(XCODE_IOS_PLATFORM iphonesimulator) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +elseif(${IOS_PLATFORM} STREQUAL "WATCHOS") + set(IOS_PLATFORM_LOCATION "WatchOS.platform") + set(XCODE_IOS_PLATFORM watchos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") +else(${IOS_PLATFORM} STREQUAL "OS") + message(FATAL_ERROR "Unsupported IOS_PLATFORM value selected. Please set to\n" + "\t OS, SIMULATOR, or WATCHOS.") +endif() + +# Check iOS developer toolchain +if(NOT DEFINED IOS_DEVELOPER_ROOT) + # Setup iOS developer location + execute_process(COMMAND xcode-select -print-path + OUTPUT_VARIABLE XCODE_DEVELOPER_DIR + RESULT_VARIABLE XCODE_DEVELOPER_DIR_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Xcode 4.3 changed the installation location, choose the most recent one available + if(${XCODE_VERSION} VERSION_LESS "4.3.0") + set(IOS_DEVELOPER_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") + else() + set(IOS_DEVELOPER_ROOT "${XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") + endif() +endif() +if(EXISTS ${IOS_DEVELOPER_ROOT}) + set(IOS_DEVELOPER_ROOT ${IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") +else() + message(FATAL_ERROR "Invalid IOS_DEVELOPER_ROOT: ${IOS_DEVELOPER_ROOT} does not exist.") endif() +# Check iOS SDK +if(NOT DEFINED IOS_SDK_ROOT) + # Find and use the most recent iOS sdk + file(GLOB IOS_SDK_LISTS "${IOS_DEVELOPER_ROOT}/SDKs/*") + if(IOS_SDK_LISTS) + list(SORT IOS_SDK_LISTS) + list(REVERSE IOS_SDK_LISTS) + list(GET IOS_SDK_LISTS 0 IOS_SDK_ROOT) + else(IOS_SDK_LISTS) + message(FATAL_ERROR "No iOS SDK's found in default search path ${IOS_DEVELOPER_ROOT}." + " Please manually set IOS_SDK_ROOT or install the iOS SDK.") + endif(IOS_SDK_LISTS) +endif() +if(EXISTS ${IOS_SDK_ROOT}) + set(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + message(STATUS "iOS toolchain: ${IOS_SDK_ROOT}") +else() + message(FATAL_ERROR "Invalid IOS_SDK_ROOT: ${IOS_SDK_ROOT} does not exist.") +endif() + +# Set the sysroot default to the most recent SDK +set(CMAKE_OSX_SYSROOT ${IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +# Get version of iOS SDK +execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion + OUTPUT_VARIABLE IOS_SDK_VERSION + RESULT_VARIABLE IOS_SDK_VERSION_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) +if(${IOS_SDK_VERSION_RESULT}) + string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" IOS_SDK_VERSION "${IOS_SDK_ROOT}") +endif() +if(NOT IOS_SDK_VERSION) + message(WARNING "Cannot get SDK's version.") + set(IOS_SDK_VERSION 1) +endif() +set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION}) + +# Find the C & C++ compilers for the specified SDK. +if(NOT CMAKE_C_COMPILER) + # Default to use clang + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang + OUTPUT_VARIABLE IOS_C_COMPILER + RESULT_VARIABLE IOS_C_COMPILER_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(${IOS_C_COMPILER_RESULT}) + get_filename_component(IOS_C_COMPILER clang PROGRAM) + endif() +else(NOT CMAKE_C_COMPILER) + # User can set it in cmake command + get_filename_component(IOS_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) +endif(NOT CMAKE_C_COMPILER) +if(NOT EXISTS ${IOS_C_COMPILER}) + message(FATAL_ERROR "Cannot find C compiler: ${IOS_C_COMPILER}") +endif() + +if(NOT CMAKE_CXX_COMPILER) + # Default to use clang++ + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++ + OUTPUT_VARIABLE IOS_CXX_COMPILER + RESULT_VARIABLE IOS_CXX_COMPILER_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(${IOS_CXX_COMPILER_RESULT}) + get_filename_component(IOS_CXX_COMPILER clang++ PROGRAM) + endif() +else(NOT CMAKE_CXX_COMPILER) + # User can set it in cmake command + get_filename_component(IOS_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) +endif(NOT CMAKE_CXX_COMPILER) +if(NOT EXISTS ${IOS_CXX_COMPILER}) + message(FATAL_ERROR "Cannot find CXX compiler: ${IOS_CXX_COMPILER}") +endif() + +set(CMAKE_C_COMPILER ${IOS_C_COMPILER} CACHE PATH "C compiler" FORCE) +set(CMAKE_CXX_COMPILER ${IOS_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) + +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +# Set iOS specific C/C++ flags +if(IOS_PLATFORM STREQUAL "OS") + if(XCODE_VERSION VERSION_LESS "7.0") + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-mios-version-min=${IOS_DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM. + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") + endif() +else() + set(XCODE_IOS_FLATFORM_VERSION_FLAGS "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() + +if(IOS_ENABLE_BITCODE) + set(XCODE_IOS_BITCODE_FLAGS "${IOS_COMPILER_FLAGS} -fembed-bitcode") +else() + set(XCODE_IOS_BITCODE_FLAGS "") +endif() + +set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_FLAGS}") + # Hidden visibilty is required for cxx on iOS -set (CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}") -set (CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden") +set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") +set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") -set (CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") -set (CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") +set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") -set (CMAKE_PLATFORM_HAS_INSTALLNAME 1) -set (CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") -set (CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") -set (CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") -set (CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") -set (CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") # hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree # (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache # and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) # hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex -if (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) - find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) -endif (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) - -# Setup iOS deployment target -set (IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") - -# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT -# Note Xcode 4.3 changed the installation location, choose the most recent one available -exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR) -set (XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") -set (XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") -if (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) - if (EXISTS ${XCODE_POST_43_ROOT}) - set (CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT}) - elseif(EXISTS ${XCODE_PRE_43_ROOT}) - set (CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT}) - endif (EXISTS ${XCODE_POST_43_ROOT}) -endif (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) -set (CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") - -# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT -if (NOT DEFINED CMAKE_IOS_SDK_ROOT) - file (GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*") - if (_CMAKE_IOS_SDKS) - list (SORT _CMAKE_IOS_SDKS) - list (REVERSE _CMAKE_IOS_SDKS) - list (GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT) - else (_CMAKE_IOS_SDKS) - message (FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.") - endif (_CMAKE_IOS_SDKS) - message (STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}") -endif (NOT DEFINED CMAKE_IOS_SDK_ROOT) -set (CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") - -# Set the sysroot default to the most recent SDK -set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") - -# set the architecture for iOS -if (IOS_PLATFORM STREQUAL "OS") - set (IOS_ARCH "armv7;armv7s;arm64") -elseif (IOS_PLATFORM STREQUAL "SIMULATOR") - set (IOS_ARCH "i386;x86_64") -elseif (IOS_PLATFORM STREQUAL "WATCHOS") - set (IOS_ARCH "armv7k") -endif () - -set (CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") +if(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif() # Set the find root to the iOS developer roots and to user defined paths -set (CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE string "iOS find search path root") +set(CMAKE_FIND_ROOT_PATH ${IOS_DEVELOPER_ROOT} ${IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} + CACHE string "iOS find search path root") # default to searching for frameworks first -set (CMAKE_FIND_FRAMEWORK FIRST) +set(CMAKE_FIND_FRAMEWORK FIRST) # set up the default search directories for frameworks -set (CMAKE_SYSTEM_FRAMEWORK_PATH - ${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks - ${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks - ${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks -) +set(CMAKE_SYSTEM_FRAMEWORK_PATH + ${IOS_SDK_ROOT}/System/Library/Frameworks + ${IOS_SDK_ROOT}/System/Library/PrivateFrameworks + ${IOS_SDK_ROOT}/Developer/Library/Frameworks + ) # only search the iOS sdks, not the remainder of the host filesystem -set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) -set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) -set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +message(STATUS "iOS: Targeting iOS '${CMAKE_SYSTEM_VERSION}', " + "building for '${IOS_PLATFORM}' platform, with architecture '${CMAKE_OSX_ARCHITECTURES}'") # This little macro lets you set any XCode specific property -macro (set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) - set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) -endmacro (set_xcode_property) +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) + set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) +endmacro(set_xcode_property) # This macro lets you find executable programs on the host system -macro (find_host_package) - set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) - set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) - set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) - set (IOS FALSE) - - find_package(${ARGN}) - - set (IOS TRUE) - set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) - set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) - set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -endmacro (find_host_package) +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(IOS FALSE) + + find_package(${ARGN}) + + set(IOS TRUE) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro(find_host_package) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index b27eb71550..11c96351e6 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -135,8 +135,10 @@ set(GPU_COMMON_FLAGS ) if (APPLE) - # On Mac OS X build fat binaries with x86_64 architectures by default. - set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) + if(NOT CMAKE_CROSSCOMPILING) + # On Mac OS X build fat binaries with x86_64 architectures by default. + set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) + endif() else() set(GPU_COMMON_FLAGS -Wall diff --git a/cmake/system.cmake b/cmake/system.cmake index 7462802d21..396bd1a079 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -24,11 +24,10 @@ IF(WIN32) SET(HOST_SYSTEM "win32") ELSE(WIN32) IF(APPLE) - EXEC_PROGRAM (sw_vers ARGS -productVersion OUTPUT_VARIABLE MACOSX_VERSION) - STRING(REGEX MATCH "[0-9]+.[0-9]+" VERSION "${MACOSX_VERSION}") - SET(MACOS_VERSION ${VERSION}) SET(HOST_SYSTEM "macosx") - IF(NOT DEFINED ENV{MACOSX_DEPLOYMENT_TARGET}) + EXEC_PROGRAM(sw_vers ARGS -productVersion OUTPUT_VARIABLE HOST_SYSTEM_VERSION) + STRING(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}") + IF(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET}) # Set cache variable - end user may change this during ccmake or cmake-gui configure. SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") @@ -49,6 +48,8 @@ ELSE(WIN32) ELSEIF(LINUX_ISSUE MATCHES "Fedora") SET(HOST_SYSTEM "fedora") ENDIF() + + STRING(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION "${LINUX_ISSUE}") ENDIF(EXISTS "/etc/issue") IF(EXISTS "/etc/redhat-release") @@ -70,7 +71,7 @@ CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES) MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES) -MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}") +MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}") MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") # configuration for cross-compiling @@ -82,7 +83,7 @@ IF(DEFINED CMAKE_SYSTEM_NAME) ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "RPi") SET(RPI TRUE) INCLUDE(cross_compiling/raspberry_pi) - ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") SET(IOS TRUE) INCLUDE(cross_compiling/ios) ENDIF() From fb38e6620e77ceeb218ef65b05ef2d73ba158796 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 29 Aug 2017 19:56:18 +0800 Subject: [PATCH 03/22] Seperate the codes that cannot and don't need to build for iOS devices. --- CMakeLists.txt | 11 ++++-- cmake/cross_compiling/ios.cmake | 2 ++ paddle/CMakeLists.txt | 8 +++-- paddle/capi/CMakeLists.txt | 56 +++++++++++++++---------------- paddle/utils/Excepts.h | 3 +- paddle/utils/arch/linux/Locks.cpp | 6 ++++ paddle/utils/arch/osx/Excepts.cpp | 3 +- 7 files changed, 53 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f0a01680aa..e3dec9b215 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -167,11 +167,16 @@ if(USE_NNPACK) list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) endif(USE_NNPACK) +message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") +message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") + add_subdirectory(proto) -# "add_subdirectory(go)" should be placed after the following loine, -# because it depends on paddle/optimizer. -add_subdirectory(paddle/optimizer) +if(NOT ANDROID AND NOT IOS) + # "add_subdirectory(go)" should be placed after the following loine, + # because it depends on paddle/optimizer. + add_subdirectory(paddle/optimizer) +endif() # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # placed after this block, because they depends on it. diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 135104e69e..dbdf29e1da 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -289,6 +289,8 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) message(STATUS "iOS: Targeting iOS '${CMAKE_SYSTEM_VERSION}', " "building for '${IOS_PLATFORM}' platform, with architecture '${CMAKE_OSX_ARCHITECTURES}'") +message(STATUS "System CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") +message(STATUS "System CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") # This little macro lets you set any XCode specific property macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index cf61a243e9..b5ad888f08 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -5,8 +5,12 @@ add_subdirectory(testing) add_subdirectory(math) add_subdirectory(parameter) add_subdirectory(gserver) -add_subdirectory(pserver) -add_subdirectory(trainer) + +if(NOT ANDROID AND NOT IOS) + add_subdirectory(pserver) + add_subdirectory(trainer) +endif() + add_subdirectory(scripts) add_subdirectory(string) diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index 11022d1754..5787b6357b 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -31,42 +31,40 @@ add_dependencies(paddle_capi paddle_proto) # combine all paddle static libraries together, into libpaddle_capi_whole.a # user should use PaddleCAPI as -lpaddle_capi_whole -set(capi_whole_library libpaddle_capi_whole.a) -add_custom_target(paddle_capi_whole ALL - COMMAND mkdir -p o_files/capi && cd o_files/capi/ && ar -x $ - COMMAND mkdir -p o_files/utils && cd o_files/utils/ && ar -x $ - COMMAND mkdir -p o_files/parameter && cd o_files/parameter/ && ar -x $ - COMMAND mkdir -p o_files/math && cd o_files/math/ && ar -x $ - COMMAND mkdir -p o_files/cuda && cd o_files/cuda/ && ar -x $ - COMMAND mkdir -p o_files/function && cd o_files/function/ && ar -x $ - COMMAND mkdir -p o_files/gserver && cd o_files/gserver/ && ar -x $ - COMMAND mkdir -p o_files/proto && cd o_files/proto/ && ar -x $ - COMMAND mkdir -p o_files/network && cd o_files/network/ && ar -x $ - COMMAND mkdir -p o_files/pserver && cd o_files/pserver/ && ar -x $ - COMMAND ar crs ${capi_whole_library} `find ./o_files -name '*.o'` - COMMAND rm -rf o_files - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS paddle_capi paddle_utils paddle_parameter paddle_math - paddle_cuda paddle_function paddle_gserver - paddle_proto paddle_pserver paddle_network - ) -set_target_properties(paddle_capi_whole - PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library}) +set(PADDLE_INFER_LIBS + paddle_utils + paddle_parameter + paddle_math + paddle_cuda + paddle_function + paddle_gserver + paddle_proto + ) -add_library(paddle_capi_shared SHARED ${CAPI_SOURCES}) -target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) -link_paddle_exe(paddle_capi_shared) +set(PADDLE_TRAIN_LIBS paddle_pserver paddle_network) + +cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_INFER_LIBS}) + +# No shared library for iOS +if(NOT IOS) + add_library(paddle_capi_shared SHARED ${CAPI_SOURCES}) + target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) + link_paddle_exe(paddle_capi_shared) +endif() # install library & headers. install(FILES ${CAPI_HEADERS} DESTINATION include/paddle) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h DESTINATION include/paddle) if(ANDROID) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library} - DESTINATION lib/${ANDROID_ABI}) - install(TARGETS paddle_capi_shared DESTINATION lib/${ANDROID_ABI}) + install(TARGETS paddle_capi_whole paddle_capi_shared + ARCHIVE DESTINATION lib/${ANDROID_ABI} + LIBRARY DESTINATION lib/${ANDROID_ABI}) else(ANDROID) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library} DESTINATION lib) - install(TARGETS paddle_capi_shared DESTINATION lib) + install(TARGETS paddle_capi_whole + ARCHIVE DESTINATION lib) + if(NOT IOS) + install(TARGETS paddle_capi_shared DESTINATION lib) + endif() endif(ANDROID) # this variable used for unittest diff --git a/paddle/utils/Excepts.h b/paddle/utils/Excepts.h index 5c2c504f53..0add66da74 100644 --- a/paddle/utils/Excepts.h +++ b/paddle/utils/Excepts.h @@ -17,7 +17,8 @@ limitations under the License. */ #include -#if defined(__APPLE__) || defined(__OSX__) +#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \ + !defined(__aarch64__) int fegetexcept(void); int feenableexcept(unsigned int excepts); diff --git a/paddle/utils/arch/linux/Locks.cpp b/paddle/utils/arch/linux/Locks.cpp index 3a0903d1f2..a4e6c8f7b8 100644 --- a/paddle/utils/arch/linux/Locks.cpp +++ b/paddle/utils/arch/linux/Locks.cpp @@ -40,6 +40,8 @@ void Semaphore::wait() { sem_wait(&m->sem); } void Semaphore::post() { sem_post(&m->sem); } +/// SpinLockPrivate + #ifdef PADDLE_USE_PTHREAD_SPINLOCK class SpinLockPrivate { @@ -79,6 +81,8 @@ SpinLock::~SpinLock() { delete m; } void SpinLock::lock() { m->lock(); } void SpinLock::unlock() { m->unlock(); } +/// ThreadBarrierPrivate + #ifdef PADDLE_USE_PTHREAD_BARRIER class ThreadBarrierPrivate { @@ -136,6 +140,8 @@ public: #endif +/// ThreadBarrier + ThreadBarrier::ThreadBarrier(int count) : m(new ThreadBarrierPrivate(count)) {} ThreadBarrier::~ThreadBarrier() { delete m; } void ThreadBarrier::wait() { m->wait(); } diff --git a/paddle/utils/arch/osx/Excepts.cpp b/paddle/utils/arch/osx/Excepts.cpp index c8e904d8f9..42ecaa06d2 100644 --- a/paddle/utils/arch/osx/Excepts.cpp +++ b/paddle/utils/arch/osx/Excepts.cpp @@ -14,7 +14,8 @@ limitations under the License. */ #include "paddle/utils/Excepts.h" -#if defined(__APPLE__) || defined(__OSX__) +#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \ + !defined(__aarch64__) int fegetexcept(void) { static fenv_t fenv; From d2435ba81cc98244ba845142a798fd913c172c89 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 29 Aug 2017 20:08:00 +0800 Subject: [PATCH 04/22] Disable the finding of python, swig related packages completely when WITH_PYTHON and WITH_SWIG_PY are set to OFF. --- cmake/external/python.cmake | 27 ++++++++++++--------------- cmake/external/swig.cmake | 4 ++++ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 53ef7cd29c..46c68cce32 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -12,16 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +IF(NOT WITH_PYTHON) + return() +ENDIF() + INCLUDE(python_module) -FIND_HOST_PACKAGE(PythonInterp 2.7) -IF(WITH_PYTHON) - FIND_HOST_PACKAGE(PythonLibs 2.7) - # Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. - ADD_LIBRARY(python SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) -ENDIF(WITH_PYTHON) +FIND_PACKAGE(PythonInterp 2.7) +FIND_PACKAGE(PythonLibs 2.7) +# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. +ADD_LIBRARY(python SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) SET(py_env "") IF(PYTHONINTERP_FOUND) @@ -29,16 +30,12 @@ IF(PYTHONINTERP_FOUND) find_python_module(numpy REQUIRED) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) - FIND_HOST_PACKAGE(NumPy REQUIRED) + FIND_PACKAGE(NumPy REQUIRED) IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " "please use pip to upgrade protobuf. pip install -U protobuf") ENDIF() ENDIF(PYTHONINTERP_FOUND) -IF(WITH_PYTHON) - INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) - INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) -ELSE() - SET(PYTHON_LIBRARIES "") -ENDIF() +INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) +INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake index 744c766ee7..ce088ae7ea 100644 --- a/cmake/external/swig.cmake +++ b/cmake/external/swig.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(NOT WITH_SWIG_PY) + return() +ENDIF() + FIND_PACKAGE(SWIG) IF(NOT SWIG_FOUND) From 224f8b06f41827c125aee1374b8259f878cc3c78 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 30 Aug 2017 15:12:30 +0800 Subject: [PATCH 05/22] Support building for multiple architecures at one time. --- CMakeLists.txt | 34 ++++++++------------------------- cmake/cross_compiling/ios.cmake | 8 +++++++- cmake/external/openblas.cmake | 23 ++++++++++++++++------ cmake/external/zlib.cmake | 18 +++++++++-------- ios_run.sh | 17 ----------------- 5 files changed, 42 insertions(+), 58 deletions(-) delete mode 100644 ios_run.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index e3dec9b215..bca2b796e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,38 +63,23 @@ if(NOT CMAKE_BUILD_TYPE) FORCE) endif() -if(IOS) - set(WITH_GPU OFF CACHE STRING - "Disable GPU when cross-compiling for Android" FORCE) - set(WITH_AVX OFF CACHE STRING - "Disable AVX when cross-compiling for Android" FORCE) - set(WITH_PYTHON OFF CACHE STRING - "Disable PYTHON when cross-compiling for Android" FORCE) - set(WITH_RDMA OFF CACHE STRING - "Disable RDMA when cross-compiling for Android" FORCE) - set(WITH_MKLDNN OFF CACHE STRING - "Disable MKLDNN when cross-compiling for Android" FORCE) - set(WITH_MKLML OFF CACHE STRING - "Disable MKLML package when cross-compiling for Android" FORCE) -endif(IOS) - -if(ANDROID) - if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") +if(ANDROID OR IOS) + if(ANDROID AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21") endif() set(WITH_GPU OFF CACHE STRING - "Disable GPU when cross-compiling for Android" FORCE) + "Disable GPU when cross-compiling for Android and iOS" FORCE) set(WITH_AVX OFF CACHE STRING - "Disable AVX when cross-compiling for Android" FORCE) + "Disable AVX when cross-compiling for Android and iOS" FORCE) set(WITH_PYTHON OFF CACHE STRING - "Disable PYTHON when cross-compiling for Android" FORCE) + "Disable PYTHON when cross-compiling for Android and iOS" FORCE) set(WITH_RDMA OFF CACHE STRING - "Disable RDMA when cross-compiling for Android" FORCE) + "Disable RDMA when cross-compiling for Android and iOS" FORCE) set(WITH_MKLDNN OFF CACHE STRING - "Disable MKLDNN when cross-compiling for Android" FORCE) + "Disable MKLDNN when cross-compiling for Android and iOS" FORCE) set(WITH_MKLML OFF CACHE STRING - "Disable MKLML package when cross-compiling for Android" FORCE) + "Disable MKLML package when cross-compiling for Android and iOS" FORCE) endif(ANDROID) set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING @@ -167,9 +152,6 @@ if(USE_NNPACK) list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) endif(USE_NNPACK) -message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") -message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") - add_subdirectory(proto) if(NOT ANDROID AND NOT IOS) diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index dbdf29e1da..b15dcec9bc 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -160,7 +160,7 @@ if(NOT DEFINED IOS_SDK_ROOT) endif(IOS_SDK_LISTS) endif() if(EXISTS ${IOS_SDK_ROOT}) - set(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + set(IOS_SDK_ROOT ${IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") message(STATUS "iOS toolchain: ${IOS_SDK_ROOT}") else() message(FATAL_ERROR "Invalid IOS_SDK_ROOT: ${IOS_SDK_ROOT} does not exist.") @@ -292,6 +292,12 @@ message(STATUS "iOS: Targeting iOS '${CMAKE_SYSTEM_VERSION}', " message(STATUS "System CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") message(STATUS "System CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") +# Used in ExternalProject command +string(REPLACE ";" "\\$" EXTERNAL_IOS_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") +set(EXTERNAL_OPTIONAL_ARGS + -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} + -DCMAKE_OSX_ARCHITECTURES=${EXTERNAL_IOS_ARCHITECTURES}) + # This little macro lets you set any XCode specific property macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 0eeccbf7d8..025eb62a4a 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -25,23 +25,32 @@ IF(NOT ${CBLAS_FOUND}) "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) - SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) - + SET(OPENBLAS_CC "${CMAKE_C_COMPILER}") IF(CMAKE_CROSSCOMPILING) + SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) IF(ANDROID) # arm_soft_fp_abi branch of OpenBLAS to support softfp # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") - SET(TARGET "ARMV7") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") - SET(TARGET "ARMV8") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) + ENDIF() + ELSEIF(IOS) + # FIXME: support multiple architectures + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") + SET(OPENBLAS_CC "${OPENBLAS_CC} -isysroot ${CMAKE_OSX_SYSROOT} -arch armv7") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) + ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + SET(OPENBLAS_CC "${OPENBLAS_CC} -isysroot ${CMAKE_OSX_SYSROOT} -arch arm64") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) ENDIF() - SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=${TARGET} ARM_SOFTFP_ABI=1 USE_THREAD=0) ELSEIF(RPI) # use hardfp SET(OPENBLAS_COMMIT "v0.2.19") - SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0) + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 USE_THREAD=0) ENDIF() ELSE() SET(OPENBLAS_COMMIT "v0.2.19") @@ -51,6 +60,8 @@ IF(NOT ${CBLAS_FOUND}) ENDIF() ENDIF() + SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) + ExternalProject_Add( extern_openblas ${EXTERNAL_PROJECT_LOG_ARGS} diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index 45ca5542b7..2fadea9c6c 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -27,6 +27,7 @@ ENDIF(WIN32) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) + ExternalProject_Add( zlib ${EXTERNAL_PROJECT_LOG_ARGS} @@ -34,15 +35,16 @@ ExternalProject_Add( GIT_TAG "v1.2.8" PREFIX ${ZLIB_SOURCES_DIR} UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} - CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_MACOSX_RPATH=ON + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release diff --git a/ios_run.sh b/ios_run.sh deleted file mode 100644 index b8325f15fc..0000000000 --- a/ios_run.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -xe - -mkdir -p ./ios_build -cd ./ios_build - -cmake -DCMAKE_SYSTEM_NAME=Darwin \ - -DWITH_C_API=ON \ - -DWITH_TESTING=OFF \ - -DWITH_SWIG_PY=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=/Users/xingzhaolong/cross_compile/ios \ - .. - # -DIOS_PLATFORM=SIMULATOR \ - #-DCMAKE_Go_COMPILER=/usr/local/bin \ - From d57ffc4557620b52d3c0a0e05cbea07f6c5efc31 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 30 Aug 2017 18:49:32 +0800 Subject: [PATCH 06/22] Deliver the cross-compilng platform-specific args to external libraries. --- CMakeLists.txt | 2 +- cmake/cross_compiling/ios.cmake | 7 ++--- cmake/external/gflags.cmake | 15 ++++++----- cmake/external/glog.cmake | 21 ++++++++------- cmake/external/openblas.cmake | 9 ++++--- cmake/external/protobuf.cmake | 3 ++- cmake/external/warpctc.cmake | 45 ++++++++++++------------------- cmake/external/zlib.cmake | 1 - paddle/gserver/layers/CostLayer.h | 4 ++- 9 files changed, 50 insertions(+), 57 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b343c62de5..a5971ddd95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,7 +81,7 @@ if(ANDROID OR IOS) "Disable MKLDNN when cross-compiling for Android and iOS" FORCE) set(WITH_MKLML OFF CACHE STRING "Disable MKLML package when cross-compiling for Android and iOS" FORCE) -endif(ANDROID) +endif() set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index b15dcec9bc..d805423cb1 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -68,10 +68,6 @@ endif() # Required as of cmake 2.8.10 set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) -set(CMAKE_AR ar CACHE FILEPATH "" FORCE) -set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) -set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE) - # Setup iOS platform unless specified manually with IOS_PLATFORM if(NOT DEFINED IOS_PLATFORM) set(IOS_PLATFORM "OS") @@ -81,7 +77,8 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") # Set the architecture for iOS if(NOT DEFINED IOS_ARCH) if(IOS_PLATFORM STREQUAL "OS") - set(IOS_ARCH "armv7;armv7s;arm64") + # FIXME: support "armv7;armv7s;arm64" future + set(IOS_ARCH "arm64") elseif(IOS_PLATFORM STREQUAL "SIMULATOR") set(IOS_ARCH "i386;x86_64") elseif(IOS_PLATFORM STREQUAL "WATCHOS") diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 16e5bef4cd..17b8f2e2a1 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -39,13 +39,14 @@ ExternalProject_Add( PREFIX ${GFLAGS_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DBUILD_TESTING=OFF - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 8a594a825a..78415b5a6d 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -34,16 +34,17 @@ ExternalProject_Add( PREFIX ${GLOG_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} - CMAKE_ARGS -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DWITH_GFLAGS=ON - CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags - CMAKE_ARGS -DBUILD_TESTING=OFF - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DWITH_GFLAGS=ON + -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 025eb62a4a..849956f490 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -28,6 +28,8 @@ IF(NOT ${CBLAS_FOUND}) SET(OPENBLAS_CC "${CMAKE_C_COMPILER}") IF(CMAKE_CROSSCOMPILING) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) + GET_FILENAME_COMPONENT(CROSS_SUFFIX ${CMAKE_C_COMPILER} DIRECTORY) + SET(CROSS_SUFFIX ${CROSS_SUFFIX}/) IF(ANDROID) # arm_soft_fp_abi branch of OpenBLAS to support softfp # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi @@ -40,12 +42,13 @@ IF(NOT ${CBLAS_FOUND}) ELSEIF(IOS) # FIXME: support multiple architectures SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") - SET(OPENBLAS_CC "${OPENBLAS_CC} -isysroot ${CMAKE_OSX_SYSROOT} -arch armv7") + SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") - SET(OPENBLAS_CC "${OPENBLAS_CC} -isysroot ${CMAKE_OSX_SYSROOT} -arch arm64") - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) + SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) ENDIF() ELSEIF(RPI) # use hardfp diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index e629d61585..d4b07d3cf6 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -173,7 +173,8 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" "-Dprotobuf_WITH_ZLIB=ON" - "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}") + "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}" + ${EXTERNAL_OPTIONAL_ARGS}) SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}") ENDIF() diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 2d7daed9bc..1327c6b95d 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -16,25 +16,13 @@ INCLUDE(ExternalProject) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) -SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) -INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) - -SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE) +SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" + CACHE PATH "Warp-ctc Directory" FORCE) +SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE FILEPATH "Warp-ctc Library" FORCE) -IF(WIN32) - SET(WARPCTC_LIBRARIES - "${WARPCTC_INSTALL_DIR}/lib/warpctc.dll" CACHE FILEPATH "Warp-ctc Library" FORCE) -ELSE(WIN32) - IF(APPLE) - SET(_warpctc_SHARED_SUFFIX dylib) - ELSE(APPLE) - SET(_warpctc_SHARED_SUFFIX so) - ENDIF(APPLE) - - SET(WARPCTC_LIBRARIES - "${WARPCTC_INSTALL_DIR}/lib/libwarpctc.${_warpctc_SHARED_SUFFIX}" CACHE FILEPATH "Warp-ctc Library" FORCE) -ENDIF(WIN32) +INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" ) SET(USE_OMP OFF) @@ -49,17 +37,18 @@ ExternalProject_Add( PREFIX ${WARPCTC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} - CMAKE_ARGS -DWITH_GPU=${WITH_GPU} - CMAKE_ARGS -DWITH_OMP=${USE_OMP} - CMAKE_ARGS -DWITH_TORCH=OFF - CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON - CMAKE_ARGS -DBUILD_SHARED=ON - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} + -DWITH_GPU=${WITH_GPU} + -DWITH_OMP=${USE_OMP} + -DWITH_TORCH=OFF + -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON + -DBUILD_SHARED=ON + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index 2fadea9c6c..0e61730e1b 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -27,7 +27,6 @@ ENDIF(WIN32) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) - ExternalProject_Add( zlib ${EXTERNAL_PROJECT_LOG_ARGS} diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 0ce72ef40a..0f655b48ee 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -318,7 +318,9 @@ public: void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override {} }; /** From aeea8ab1c4e102ac687da3598011767d7b7a7321 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 30 Aug 2017 19:59:01 +0800 Subject: [PATCH 07/22] Remove the linking of train-related libraries when cross-compiling for Android and iOS. Recover the mistakenly deleted WARPCTC variable in cmake. --- cmake/cross_compiling/ios.cmake | 2 +- cmake/external/gtest.cmake | 19 ++++++++++--------- cmake/external/openblas.cmake | 2 +- cmake/external/warpctc.cmake | 8 ++++++-- cmake/util.cmake | 13 +++++++++---- paddle/capi/CMakeLists.txt | 6 +++--- 6 files changed, 30 insertions(+), 20 deletions(-) diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index d805423cb1..eea17436bd 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -77,7 +77,7 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") # Set the architecture for iOS if(NOT DEFINED IOS_ARCH) if(IOS_PLATFORM STREQUAL "OS") - # FIXME: support "armv7;armv7s;arm64" future + # FIXME(liuyiqun): support "armv7;armv7s;arm64" future set(IOS_ARCH "arm64") elseif(IOS_PLATFORM STREQUAL "SIMULATOR") set(IOS_ARCH "i386;x86_64") diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index e3970073a1..6a2a79b763 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -48,15 +48,16 @@ IF(WITH_TESTING) PREFIX ${GTEST_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DBUILD_GMOCK=ON - CMAKE_ARGS -Dgtest_disable_pthreads=ON - CMAKE_ARGS -Dgtest_force_shared_crt=ON - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_GMOCK=ON + -Dgtest_disable_pthreads=ON + -Dgtest_force_shared_crt=ON + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 849956f490..66c2a8bd80 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -40,7 +40,7 @@ IF(NOT ${CBLAS_FOUND}) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) ENDIF() ELSEIF(IOS) - # FIXME: support multiple architectures + # FIXME(liuyiqun): support multiple architectures SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 1327c6b95d..bb258c7b55 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -19,11 +19,12 @@ SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) +# Used in unit test test_WarpCTCLayer +SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" + CACHE PATH "Warp-ctc Library Directory" FORCE) SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE FILEPATH "Warp-ctc Library" FORCE) -INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) - IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" ) SET(USE_OMP OFF) ELSE() @@ -54,6 +55,9 @@ ExternalProject_Add( -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} ) +MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") +INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) + ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) ADD_DEPENDENCIES(warpctc extern_warpctc) diff --git a/cmake/util.cmake b/cmake/util.cmake index 0da4969d31..bfe269ea25 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -71,20 +71,25 @@ function(link_paddle_exe TARGET_NAME) generate_rdma_links() endif() + if(NOT ANDROID AND NOT IOS) + set(PADDLE_TRAIN_LIBS + paddle_pserver + paddle_network + paddle_trainer_lib + paddle_optimizer) + endif() + target_circle_link_libraries(${TARGET_NAME} ARCHIVE_START paddle_gserver paddle_function ARCHIVE_END - paddle_pserver - paddle_trainer_lib - paddle_network paddle_math paddle_utils paddle_parameter paddle_proto paddle_cuda - paddle_optimizer + ${PADDLE_TRAIN_LIBS} ${EXTERNAL_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS} diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index 7a83508f31..071f5a0b0d 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -30,7 +30,7 @@ add_dependencies(paddle_capi paddle_proto) # combine all paddle static libraries together, into libpaddle_capi_whole.a # user should use PaddleCAPI as -lpaddle_capi_whole -set(PADDLE_INFER_LIBS +set(PADDLE_CAPI_INFER_LIBS paddle_utils paddle_parameter paddle_math @@ -40,9 +40,9 @@ set(PADDLE_INFER_LIBS paddle_proto ) -set(PADDLE_TRAIN_LIBS paddle_pserver paddle_network) +set(PADDLE_CAPI_TRAIN_LIBS paddle_pserver paddle_network) -cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_INFER_LIBS}) +cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) # No shared library for iOS if(NOT IOS) From fb93a8be27642f1761262778afea09353fa5c71d Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 31 Aug 2017 17:57:05 +0800 Subject: [PATCH 08/22] Add paddle_pserver back to c-api library, because it is used in Evaluator.h. --- CMakeLists.txt | 8 +++----- cmake/cross_compiling/ios.cmake | 2 -- cmake/util.cmake | 13 ++++--------- paddle/CMakeLists.txt | 10 +++------- paddle/capi/CMakeLists.txt | 5 ++--- 5 files changed, 12 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a5971ddd95..ba1febe5e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,11 +155,9 @@ endif(USE_NNPACK) add_subdirectory(proto) -if(NOT ANDROID AND NOT IOS) - # "add_subdirectory(go)" should be placed after the following loine, - # because it depends on paddle/optimizer. - add_subdirectory(paddle/optimizer) -endif() +# "add_subdirectory(go)" should be placed after the following loine, +# because it depends on paddle/optimizer. +add_subdirectory(paddle/optimizer) # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # placed after this block, because they depends on it. diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index eea17436bd..4b2a18bcc6 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -49,8 +49,6 @@ if(NOT IOS) endif() set(CMAKE_SYSTEM_NAME Darwin) -#set(UNIX ON) -#set(APPLE ON) # Get the Xcode version being used. execute_process(COMMAND xcodebuild -version diff --git a/cmake/util.cmake b/cmake/util.cmake index bfe269ea25..0da4969d31 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -71,25 +71,20 @@ function(link_paddle_exe TARGET_NAME) generate_rdma_links() endif() - if(NOT ANDROID AND NOT IOS) - set(PADDLE_TRAIN_LIBS - paddle_pserver - paddle_network - paddle_trainer_lib - paddle_optimizer) - endif() - target_circle_link_libraries(${TARGET_NAME} ARCHIVE_START paddle_gserver paddle_function ARCHIVE_END + paddle_pserver + paddle_trainer_lib + paddle_network paddle_math paddle_utils paddle_parameter paddle_proto paddle_cuda - ${PADDLE_TRAIN_LIBS} + paddle_optimizer ${EXTERNAL_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS} diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index cee47bb8ac..b435de80a2 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -5,12 +5,8 @@ add_subdirectory(testing) add_subdirectory(math) add_subdirectory(parameter) add_subdirectory(gserver) - -if(NOT ANDROID AND NOT IOS) - add_subdirectory(pserver) - add_subdirectory(trainer) -endif() - +add_subdirectory(pserver) +add_subdirectory(trainer) add_subdirectory(scripts) add_subdirectory(string) @@ -23,7 +19,7 @@ if(Boost_FOUND) endif() if(WITH_C_API) - add_subdirectory(capi) + add_subdirectory(capi) endif() if(WITH_SWIG_PY) diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index 071f5a0b0d..dca3b887e1 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -38,9 +38,8 @@ set(PADDLE_CAPI_INFER_LIBS paddle_function paddle_gserver paddle_proto - ) - -set(PADDLE_CAPI_TRAIN_LIBS paddle_pserver paddle_network) + paddle_pserver + paddle_network) cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) From 7976876993ca021741211d3a4c9256c6cd2f625a Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 4 Sep 2017 18:42:33 +0800 Subject: [PATCH 09/22] Add the finding of iOS's native vecLib.framework. --- cmake/cblas.cmake | 27 +++++++++++++++++++++++++++ cmake/cross_compiling/ios.cmake | 6 ++++-- cmake/util.cmake | 4 +++- paddle/math/MathFunctions.h | 2 +- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 854066fd1d..bac152bedf 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -171,3 +171,30 @@ if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") endif() + +if(IOS) + # Find vecLib for iOS + set(VECLIB_SEARCH_DIRS + ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks + ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Frameworks + ) + find_path(VECLIB_INC_DIR vecLib.h PATHS ${VECLIB_SEARCH_DIRS}/vecLib.framework/Headers) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(vecLib DEFAULT_MSG VECLIB_INC_DIR) + + if(VECLIB_FOUND) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER vecLib) + set(CBLAS_INC_DIR ${VECLIB_INC_DIR}) + if(VECLIB_INC_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*") + set(CBLAS_LIBRARIES -lcblas "-framework vecLib") + message(STATUS "Found standalone vecLib.framework") + else() + set(CBLAS_LIBRARIES -lcblas "-framework Accelerate") + message(STATUS "Found vecLib as part of Accelerate.framework") + endif() + + add_definitions(-DPADDLE_USE_VECLIB) + endif() +endif() diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 4b2a18bcc6..537a7728c1 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -249,8 +249,10 @@ set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_fi set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) -set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") -set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +if(NOT IOS_ENABLE_BITCODE) + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") + set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +endif() set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") diff --git a/cmake/util.cmake b/cmake/util.cmake index 0da4969d31..e814cad36f 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -25,7 +25,9 @@ function(target_circle_link_libraries TARGET_NAME) endif() endforeach() if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - list(APPEND LIBS "-undefined dynamic_lookup") + if(IOS AND NOT IOS_ENABLE_BITCODE) + list(APPEND LIBS "-undefined dynamic_lookup") + endif() endif() list(REVERSE libsInArgn) target_link_libraries(${TARGET_NAME} diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 637643838f..410851120b 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -26,7 +26,7 @@ limitations under the License. */ #include #endif -#ifdef PADDLE_USE_ATLAS +#if defined(PADDLE_USE_ATLAS) || defined(PADDLE_USE_VECLIB) extern "C" { #include #include From 2ee8a4c84b26b1821ecb9a04f3db44df9775482d Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 4 Sep 2017 19:08:33 +0800 Subject: [PATCH 10/22] Add build_ios task in travis, and move the installing of glide into check_style.sh. --- .travis.yml | 11 +++++++---- paddle/scripts/travis/build_ios.sh | 17 +++++++++++++++++ paddle/scripts/travis/check_style.sh | 6 ++++++ 3 files changed, 30 insertions(+), 4 deletions(-) create mode 100755 paddle/scripts/travis/build_ios.sh diff --git a/.travis.yml b/.travis.yml index b4b83fcdbc..d8f3c0be9b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ cache: - $HOME/.cache/pip - $TRAVIS_BUILD_DIR/build/third_party - $TRAVIS_BUILD_DIR/build_android/third_party + - $TRAVIS_BUILD_DIR/build_ios/third_party sudo: required dist: trusty os: @@ -13,6 +14,12 @@ env: - JOB=build_doc - JOB=check_style - JOB=build_android +matrix: + include: + - env: JOB=build_ios + os: osx + osx_image: xcode8.3 + compiler: clang addons: apt: packages: @@ -39,10 +46,6 @@ before_install: # protobuf version. - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - - curl https://glide.sh/get | bash - - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - - go get -u github.com/alecthomas/gometalinter - - gometalinter --install - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/paddle/scripts/travis/build_ios.sh b/paddle/scripts/travis/build_ios.sh new file mode 100755 index 0000000000..d303804252 --- /dev/null +++ b/paddle/scripts/travis/build_ios.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -e + +# Create the build directory for CMake. +mkdir -p $TRAVIS_BUILD_DIR/build_ios +cd $TRAVIS_BUILD_DIR/build_ios + +# Compile paddle binaries +cmake -DCMAKE_SYSTEM_NAME=iOS \ + -DIOS_PLATFORM=OS \ + -DWITH_C_API=ON \ + -DWITH_TESTING=OFF \ + -DWITH_SWIG_PY=OFF \ + -DWITH_STYLE_CHECK=OFF \ + .. + +make -j `nproc` diff --git a/paddle/scripts/travis/check_style.sh b/paddle/scripts/travis/check_style.sh index ec499a839a..cb483b0ffc 100755 --- a/paddle/scripts/travis/check_style.sh +++ b/paddle/scripts/travis/check_style.sh @@ -8,6 +8,12 @@ function abort(){ trap 'abort' 0 set -e +# install glide +curl https://glide.sh/get | bash +eval "$(GIMME_GO_VERSION=1.8.3 gimme)" +go get -u github.com/alecthomas/gometalinter +gometalinter --install + cd $TRAVIS_BUILD_DIR export PATH=/usr/bin:$PATH pre-commit install From 9e524fce1bf7f81825165d1fff29764cf6aaebc5 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 13 Sep 2017 15:59:56 +0800 Subject: [PATCH 11/22] Remove the generation of executable, such as paddle_trainer, when setting WITH_C_API. --- .travis.yml | 4 +-- cmake/cross_compiling/ios.cmake | 3 ++ paddle/function/neon/NeonDepthwiseConv.cpp | 2 +- paddle/function/neon/NeonDepthwiseConv.h | 2 +- paddle/gserver/layers/SequenceSliceLayer.cpp | 9 +++--- paddle/pserver/CMakeLists.txt | 20 +++++++------ paddle/scripts/travis/build_ios.sh | 5 +++- paddle/trainer/CMakeLists.txt | 30 +++++++++++--------- 8 files changed, 44 insertions(+), 31 deletions(-) diff --git a/.travis.yml b/.travis.yml index 83f0f45ad1..0a8edb42b4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,8 +41,8 @@ before_install: - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt - - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker + - sudo pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt + - sudo pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 537a7728c1..025928c21b 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -252,6 +252,9 @@ set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) if(NOT IOS_ENABLE_BITCODE) set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +else() + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib") + set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle") endif() set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 18126152ea..38aa667061 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -52,7 +52,7 @@ public: int outputHeight = output[2]; int outputWidth = output[3]; int filterMultiplier = outputChannels / groups_; - CHECK_EQ(inputChannels, groups_); + CHECK_EQ(static_cast(inputChannels), groups_); // only support strideH() == strideW() and filterHeight == filterWidth. CHECK_EQ(strideH(), strideW()); diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h index aefeea78ba..33722d3cac 100644 --- a/paddle/function/neon/NeonDepthwiseConv.h +++ b/paddle/function/neon/NeonDepthwiseConv.h @@ -594,7 +594,7 @@ struct StridePadding { float32x4_t s1 = vdupq_n_f32(0.f); for (int s = 0; s < step; s++) { float32x4_t s0 = vld1q_f32(input); - float32x4x2_t v = {s0, s1}; + float32x4x2_t v = {{s0, s1}}; vst2q_f32(inputPadding, v); input += 4; inputPadding += 8; diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/gserver/layers/SequenceSliceLayer.cpp index d3a83fad27..ce68ca4494 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/gserver/layers/SequenceSliceLayer.cpp @@ -73,9 +73,10 @@ void SequenceSliceLayer::checkInputs() { CHECK(inputSeq.hasSeq()) << "The first input of sequence slice layer " << "must be a sequence."; const MatrixPtr indices1 = getInputValue(1); - CHECK_EQ(static_cast(indices1->getHeight()), - inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() - : inputSeq.getNumSequences()) + CHECK_EQ( + indices1->getHeight(), + static_cast(inputSeq.hasSubseq() ? inputSeq.getNumSubSequences() + : inputSeq.getNumSequences())) << "Height of the second input should be equal to number of sequence " << "in the first input."; if (inputLayers_.size() == 3) { @@ -151,7 +152,7 @@ void SequenceSliceLayer::calSelectedRows(const MatrixPtr starts, if (ends) endPos = inputSeqInfoVec_[i][j] + ends->getElement(rowIdx, k); int seqLen = endPos - begPos + 1; - CHECK_GT(seqLen, 0U); + CHECK_GT(seqLen, 0); for (int m = begPos; m <= endPos; ++m) selectedRows_.push_back(m); hasSubseq ? outSubSeqStartPos_.push_back(outSubSeqStartPos_.back() + seqLen) diff --git a/paddle/pserver/CMakeLists.txt b/paddle/pserver/CMakeLists.txt index 2245c7d88c..ccfc0e7602 100644 --- a/paddle/pserver/CMakeLists.txt +++ b/paddle/pserver/CMakeLists.txt @@ -45,14 +45,18 @@ add_dependencies(paddle_pserver paddle_proto ${external_project_dependencies}) set(PSERVER_MAIN_SOURCES ParameterServer2Main.cpp) -add_executable(paddle_pserver_main - ${PSERVER_MAIN_SOURCES}) -link_paddle_exe(paddle_pserver_main) if(WITH_TESTING) add_subdirectory(test) endif() -install(TARGETS paddle_pserver_main - RUNTIME DESTINATION opt/paddle/bin - PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) -set_target_properties(paddle_pserver_main PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) + +if(NOT WITH_C_API) + add_executable(paddle_pserver_main ${PSERVER_MAIN_SOURCES}) + link_paddle_exe(paddle_pserver_main) + + install(TARGETS paddle_pserver_main + RUNTIME DESTINATION opt/paddle/bin + PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ + GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) + + set_target_properties(paddle_pserver_main PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) +endif() diff --git a/paddle/scripts/travis/build_ios.sh b/paddle/scripts/travis/build_ios.sh index d303804252..3568b5ebcb 100755 --- a/paddle/scripts/travis/build_ios.sh +++ b/paddle/scripts/travis/build_ios.sh @@ -8,10 +8,13 @@ cd $TRAVIS_BUILD_DIR/build_ios # Compile paddle binaries cmake -DCMAKE_SYSTEM_NAME=iOS \ -DIOS_PLATFORM=OS \ + -DCMAKE_OSX_ARCHITECTURES="arm64" \ -DWITH_C_API=ON \ + -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_TESTING=OFF \ -DWITH_SWIG_PY=OFF \ -DWITH_STYLE_CHECK=OFF \ + -DCMAKE_BUILD_TYPE=Release \ .. -make -j `nproc` +make diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index eac0584d30..3d471a0c01 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -50,22 +50,22 @@ macro(add_paddle_exe TARGET_NAME) link_paddle_exe(${TARGET_NAME}) endmacro() -add_paddle_exe(paddle_trainer - TrainerMain.cpp) - -add_paddle_exe(paddle_merge_model - MergeModel.cpp) - if(WITH_TESTING) - add_subdirectory(tests) + add_subdirectory(tests) endif() -install(TARGETS paddle_trainer paddle_merge_model - RUNTIME DESTINATION opt/paddle/bin - PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) -set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) -set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) +if(NOT WITH_C_API) + add_paddle_exe(paddle_trainer TrainerMain.cpp) + add_paddle_exe(paddle_merge_model MergeModel.cpp) + + install(TARGETS paddle_trainer paddle_merge_model + RUNTIME DESTINATION opt/paddle/bin + PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ + GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) + + set_target_properties(paddle_trainer PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) + set_target_properties(paddle_merge_model PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) +endif() if(APPLE) set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") @@ -73,6 +73,8 @@ endif() if(WITH_GOLANG) add_dependencies(paddle_trainer_lib paddle_pserver_cclient) - target_link_libraries(paddle_trainer paddle_pserver_cclient) target_link_libraries(paddle_trainer_lib paddle_pserver_cclient) + if(NOT WITH_C_API) + target_link_libraries(paddle_trainer paddle_pserver_cclient) + endif() endif(WITH_GOLANG) From 6d60352e7e5d4a01a61de395fc87438cf814b5c7 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Wed, 13 Sep 2017 22:28:29 +0800 Subject: [PATCH 12/22] Add soft-label support for cross-entropy operator. --- paddle/operators/cross_entropy_op.cc | 64 ++++++---- paddle/operators/cross_entropy_op.cu | 119 ++++++++++++------ paddle/operators/cross_entropy_op.h | 92 +++++++++----- paddle/pybind/pybind.cc | 2 +- .../framework/tests/test_cross_entropy_op.py | 25 +++- .../paddle/v2/framework/tests/test_mnist.py | 2 +- 6 files changed, 205 insertions(+), 99 deletions(-) diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index ab1e1c101a..32ad0e82fa 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -17,48 +17,62 @@ limitations under the License. */ namespace paddle { namespace operators { -class OnehotCrossEntropyOp : public framework::OperatorWithKernel { +class CrossEntropyOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto *X = ctx.Input("X"); - auto *label = ctx.Input("label"); + auto *x = ctx.Input("X"); + auto *label = ctx.Input("Label"); - PADDLE_ENFORCE_EQ(X->dims().size(), 2, "X's dimension must be 2."); - PADDLE_ENFORCE_EQ(label->dims().size(), 1, "label's dimension must be 1."); - PADDLE_ENFORCE_EQ(X->dims()[0], label->dims()[0]); - ctx.Output("Y")->Resize({X->dims()[0]}); + PADDLE_ENFORCE_EQ(x->dims().size(), 2, "X's rank must be 2."); + PADDLE_ASSERT(label->dims().size() == 1 || label->dims().size() == 2); + if (label->dims().size() == 2) { + // soft cross entropy + PADDLE_ENFORCE_EQ(x->dims(), label->dims()); + } else { + // normal cross entropy + PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0]); + } + ctx.Output("Y")->Resize({x->dims()[0]}); } }; -class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { +class CrossEntropyGradientOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto dX = ctx.Output(framework::GradVarName("X")); - auto X = ctx.Input("X"); + auto dx = ctx.Output(framework::GradVarName("X")); + auto x = ctx.Input("X"); - dX->Resize(X->dims()); + dx->Resize(x->dims()); } }; -class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { +class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: - OnehotCrossEntropyOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) + CrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of OnehotCrossEntropyOp"); - AddInput("label", "The second input of OnehotCrossEntropyOp"); - AddOutput("Y", "The output of OnehotCrossEntropyOp"); + AddInput("X", "The first input of CrossEntropyOp"); + AddInput("Label", "The second input of CrossEntropyOp"); + AddOutput("Y", "The output of CrossEntropyOp"); AddComment(R"DOC( -OnehotCrossEntropy Operator. +CrossEntropy Operator. - Y[i] = -log(X[i][j]) +The second input (Label tensor) supports two kinds of shapes: +1) Rank(Label) = 1, Label[i] indicates the class index for sample i: + Y[i] = -log(X[i, Label[i]]) +2) Rank(Label) = 2, Label[i, j] indicates the soft label of class j + for sample i: + Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} + Please make sure that in this case the summuation of each row of Label + equals one. If each row of Label has only one non-zero element (equals 1), + it degenerates to a standard one-hot representation. )DOC"); } }; @@ -66,10 +80,8 @@ OnehotCrossEntropy Operator. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp, - ops::OnehotCrossEntropyOpMaker, onehot_cross_entropy_grad, - ops::OnehotCrossEntropyGradientOp); -REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); -REGISTER_OP_CPU_KERNEL(onehot_cross_entropy_grad, - ops::OnehotCrossEntropyGradientOpKernel); +REGISTER_OP(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, + cross_entropy_grad, ops::CrossEntropyGradientOp); +REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel); +REGISTER_OP_CPU_KERNEL(cross_entropy_grad, + ops::CrossEntropyGradientOpKernel); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index d999bfce58..1f5e9c1b04 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -21,17 +21,16 @@ namespace operators { using Tensor = framework::Tensor; template -__host__ __device__ T clipping_log(const T x) { +__host__ __device__ T tolerable_value(const T x) { PADDLE_ASSERT(std::is_floating_point::value); const T kApproInf = 1e20; - T v = log(x); - if (v == INFINITY) { + if (x == INFINITY) { return kApproInf; } - if (v == -INFINITY) { + if (x == -INFINITY) { return -kApproInf; } - return v; + return x; } template @@ -42,7 +41,20 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { PADDLE_ASSERT(label[i] >= 0 && label[i] < D); - Y[i] = -clipping_log(X[i * D + label[i]]); + Y[i] = -tolerable_value(log(X[i * D + label[i]])); + } +} + +template +__global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, + const int N, const int D) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; + i += blockDim.x * gridDim.x) { + T sum = static_cast(0); + for (int j = 0; j < D; j++) { + sum += label[i * D + j] * log(X[i * D + j]); + } + Y[i] = -tolerable_value(sum); } } @@ -69,57 +81,89 @@ __global__ void CrossEntropyGradientKernel(T* dX, const T* dY, const T* X, } template -class OnehotCrossEntropyOpCUDAKernel : public framework::OpKernel { +__global__ void SoftCrossEntropyGradientKernel(T* dX, const T* dY, const T* X, + const T* label, const int N, + const int D) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; + i += blockDim.x * gridDim.x) { + for (int j = 0; j < D; ++j) { + int idx = i * D + j; + dX[idx] = -label[idx] * dY[i] / X[idx]; + } + } +} + +template +class CrossEntropyOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "It must use GPUPlace."); - auto X = ctx.Input("X"); - const T* Xdata = X->data(); - const int* label_data = ctx.Input("label")->data(); - auto Y = ctx.Output("Y"); - Y->mutable_data(ctx.GetPlace()); - T* Ydata = Y->data(); + auto x = ctx.Input("X"); + auto y = ctx.Output("Y"); + auto label = ctx.Input("Label"); + + auto* x_data = x->data(); + y->mutable_data(ctx.GetPlace()); + auto* y_data = y->data(); - int N = X->dims()[0]; - int D = X->dims()[1]; + int n = x->dims()[0]; + int d = x->dims()[1]; int block = 512; - int grid = (N + block - 1) / block; + int grid = (n + block - 1) / block; // TODO(qingqing) launch kernel on specified stream // base on ExecutionContext. - CrossEntropyKernel<<>>(Ydata, Xdata, label_data, N, D); + int label_rank = label->dims().size(); + if (label_rank == 2) { + // soft cross entropy + auto* label_data = ctx.Input("Label")->data(); + SoftCrossEntropyKernel<<>>(y_data, x_data, label_data, n, + d); + } else { + // normal cross entropy + auto* label_data = ctx.Input("Label")->data(); + CrossEntropyKernel<<>>(y_data, x_data, label_data, n, d); + } } }; template -class OnehotCrossEntropyGradientOpCUDAKernel : public framework::OpKernel { +class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "It must use GPUPlace."); - auto X = ctx.Input("X"); - auto dX = ctx.Output(framework::GradVarName("X")); - auto dY = ctx.Input(framework::GradVarName("Y")); - auto label = ctx.Input("label"); + auto x = ctx.Input("X"); + auto dx = ctx.Output(framework::GradVarName("X")); + auto dy = ctx.Input(framework::GradVarName("Y")); + auto label = ctx.Input("Label"); - auto* dXdata = dX->template mutable_data(ctx.GetPlace()); - auto* dYdata = dY->template data(); - auto* Xdata = X->template data(); - auto* label_data = label->data(); + auto* dx_data = dx->mutable_data(ctx.GetPlace()); + auto* dy_data = dy->data(); + auto* x_data = x->data(); - int N = X->dims()[0]; - int D = X->dims()[1]; + int n = x->dims()[0]; + int d = x->dims()[1]; int block = 512; - int grid = (N * D + block - 1) / block; - zero<<>>(dXdata, N * D); - - grid = (N + block - 1) / block; + int grid = (n * d + block - 1) / block; + zero<<>>(dx_data, n * d); + grid = (n + block - 1) / block; // TODO(qingqing): launch kernel on specified stream // base on ExecutionContext. - CrossEntropyGradientKernel<<>>(dXdata, dYdata, Xdata, - label_data, N, D); + int label_rank = label->dims().size(); + if (label_rank == 2) { + // soft cross entropy + auto* label_data = label->data(); + SoftCrossEntropyGradientKernel<<>>( + dx_data, dy_data, x_data, label_data, n, d); + } else { + // normal cross entropy + auto* label_data = label->data(); + CrossEntropyGradientKernel<<>>(dx_data, dy_data, x_data, + label_data, n, d); + } } }; @@ -127,7 +171,6 @@ class OnehotCrossEntropyGradientOpCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpCUDAKernel); -REGISTER_OP_GPU_KERNEL(onehot_cross_entropy_grad, - ops::OnehotCrossEntropyGradientOpCUDAKernel); +REGISTER_OP_GPU_KERNEL(cross_entropy, ops::CrossEntropyOpCUDAKernel); +REGISTER_OP_GPU_KERNEL(cross_entropy_grad, + ops::CrossEntropyGradientOpCUDAKernel); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index eb4d1348de..9a661cb9cf 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -40,56 +40,86 @@ inline T tolerable_value(const T x) { } template -class OnehotCrossEntropyOpKernel : public framework::OpKernel { +class CrossEntropyOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - auto X = ctx.Input("X"); - const T* Xdata = X->data(); - const int* label_data = ctx.Input("label")->data(); - auto Y = ctx.Output("Y"); - - Y->mutable_data(ctx.GetPlace()); - - T* Ydata = Y->data(); - - int batch_size = X->dims()[0]; - int class_num = X->dims()[1]; - - for (int i = 0; i < batch_size; ++i) { - int index = i * class_num + label_data[i]; - Ydata[i] = -tolerable_value(std::log(Xdata[index])); + auto x = ctx.Input("X"); + auto y = ctx.Output("Y"); + + auto* x_data = x->data(); + y->mutable_data(ctx.GetPlace()); + auto* y_data = y->data(); + + int batch_size = x->dims()[0]; + int class_num = x->dims()[1]; + int label_rank = ctx.Input("Label")->dims().size(); + + if (label_rank == 2) { + // soft cross entropy + auto* label_data = ctx.Input("Label")->data(); + int index = 0; + for (int i = 0; i < batch_size; ++i) { + T sum = static_cast(0); + for (int j = 0; j < class_num; ++j) { + sum += label_data[index] * std::log(x_data[index]); + y_data[i] = -tolerable_value(sum); + index++; + } + } + } else { + // normal cross entropy + auto* label_data = ctx.Input("Label")->data(); + for (int i = 0; i < batch_size; ++i) { + int index = i * class_num + label_data[i]; + y_data[i] = -tolerable_value(std::log(x_data[index])); + } } } }; template -class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel { +class CrossEntropyGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - auto X = ctx.Input("X"); - auto dX = ctx.Output(framework::GradVarName("X")); - auto dY = ctx.Input(framework::GradVarName("Y")); - auto label = ctx.Input("label"); + auto x = ctx.Input("X"); + auto dx = ctx.Output(framework::GradVarName("X")); + auto dy = ctx.Input(framework::GradVarName("Y")); + auto label = ctx.Input("Label"); - auto* dXdata = dX->template mutable_data(ctx.GetPlace()); - auto* dYdata = dY->template data(); - auto* Xdata = X->template data(); - auto* label_data = label->data(); + auto* dx_data = dx->mutable_data(ctx.GetPlace()); + auto* dy_data = dy->data(); + auto* x_data = x->data(); - const int batch_size = X->dims()[0]; - const int class_num = X->dims()[1]; + int batch_size = x->dims()[0]; + int class_num = x->dims()[1]; + int label_rank = ctx.Input("Label")->dims().size(); // TODO(qingqing): make zero setting an common function. - memset(dXdata, 0, sizeof(T) * batch_size * class_num); - for (int i = 0; i < batch_size; ++i) { - int index = i * class_num + label_data[i]; - dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]); + if (label_rank == 2) { + // soft cross entropy + auto* label_data = ctx.Input("Label")->data(); + int index = 0; + for (int i = 0; i < batch_size; ++i) { + for (int j = 0; j < class_num; ++j) { + dx_data[index] = -label_data[index] * dy_data[i] / x_data[index]; + index++; + } + } + } else { + // normal cross entropy + auto* label_data = label->data(); + memset(dx_data, 0, sizeof(T) * batch_size * class_num); + for (int i = 0; i < batch_size; ++i) { + PADDLE_ASSERT(label_data[i] >= 0 || label_data[i] < class_num); + int index = i * class_num + label_data[i]; + dx_data[index] = -dy_data[i] / x_data[index]; + } } } }; diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 16a2368aae..13e11fe82a 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -32,7 +32,7 @@ limitations under the License. */ namespace py = pybind11; USE_OP(add); -USE_OP(onehot_cross_entropy); +USE_OP(cross_entropy); USE_OP(sgd); USE_OP(mul); USE_OP(mean); diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index c2fc102a8b..b845bbc680 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -5,13 +5,13 @@ from op_test import OpTest class TestCrossEntropy(OpTest): def setUp(self): - self.op_type = "onehot_cross_entropy" + self.op_type = "cross_entropy" batch_size = 30 class_num = 10 X = numpy.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") label = (class_num / 2) * numpy.ones(batch_size).astype("int32") - self.inputs = {'X': X, 'label': label} + self.inputs = {'X': X, 'Label': label} Y = [] for i in range(0, batch_size): Y.append(-numpy.log(X[i][label[i]])) @@ -24,5 +24,26 @@ class TestCrossEntropy(OpTest): self.check_grad(['X'], 'Y') +class TestCrossEntropySoftLabel(OpTest): + def setUp(self): + self.op_type = "cross_entropy" + batch_size = 30 + class_num = 10 + X = numpy.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + label = numpy.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + label /= label.sum(axis=1, keepdims=True) + self.inputs = {'X': X, 'Label': label} + Y = (-label * numpy.log(X)).sum(axis=1) + self.outputs = {'Y': numpy.array(Y).astype("float32")} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.05) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_mnist.py b/python/paddle/v2/framework/tests/test_mnist.py index f6f8f49b79..10f2810ad0 100644 --- a/python/paddle/v2/framework/tests/test_mnist.py +++ b/python/paddle/v2/framework/tests/test_mnist.py @@ -128,7 +128,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): def cross_entropy_layer(net, input, label): cost_name = "cross_entropy_%d" % uniq_id() cross_entropy_op = Operator( - "onehot_cross_entropy", X=input, label=label, Y=cost_name) + "cross_entropy", X=input, label=label, Y=cost_name) net.append_op(cross_entropy_op) scope.new_var(cost_name) net.infer_shape(scope) From e87068290e2f6b714b5b171d8cd4cbfe985bd921 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Sat, 16 Sep 2017 18:57:13 +0800 Subject: [PATCH 13/22] Update cross entropy operator by following reviewer's comments. --- paddle/operators/cross_entropy_op.cc | 6 ++++++ paddle/operators/cross_entropy_op.cu | 3 ++- python/paddle/v2/framework/tests/test_cross_entropy_op.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index c31c132898..61d2104b95 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -54,6 +54,9 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of CrossEntropyOp must not be null."); + auto dx = ctx.Output(framework::GradVarName("X")); auto x = ctx.Input("X"); @@ -74,11 +77,14 @@ CrossEntropy Operator. The second input (Label tensor) supports two kinds of shapes: 1) Rank(Label) = 1, Label[i] indicates the class index for sample i: + Y[i] = -log(X[i, Label[i]]) 2) Rank(Label) = 2, Label[i, j] indicates the soft label of class j for sample i: + Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} + Please make sure that in this case the summuation of each row of Label equals one. If each row of Label has only one non-zero element (equals 1), it degenerates to a standard one-hot representation. diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 1f5e9c1b04..e80dcec8e2 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -14,6 +14,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/platform/assert.h" +#include "paddle/platform/hostdevice.h" namespace paddle { namespace operators { @@ -21,7 +22,7 @@ namespace operators { using Tensor = framework::Tensor; template -__host__ __device__ T tolerable_value(const T x) { +HOSTDEVICE T tolerable_value(const T x) { PADDLE_ASSERT(std::is_floating_point::value); const T kApproInf = 1e20; if (x == INFINITY) { diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index a630dea7f5..ccff2a386d 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -45,7 +45,7 @@ class TestCrossEntropySoftLabel(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.05) + self.check_grad(['X'], 'Y') if __name__ == "__main__": From 46ee8ceecbd36f63b4fbfa77814aa0548023e677 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 15 Sep 2017 09:44:42 +0800 Subject: [PATCH 14/22] Move the sreach and link of veclib to iOS's toolchain file . --- cmake/cblas.cmake | 30 +++++----------------------- cmake/cross_compiling/ios.cmake | 35 +++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index bac152bedf..8fdc382f0c 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -172,29 +172,9 @@ if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") endif() -if(IOS) - # Find vecLib for iOS - set(VECLIB_SEARCH_DIRS - ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks - ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Frameworks - ) - find_path(VECLIB_INC_DIR vecLib.h PATHS ${VECLIB_SEARCH_DIRS}/vecLib.framework/Headers) - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(vecLib DEFAULT_MSG VECLIB_INC_DIR) - - if(VECLIB_FOUND) - set(CBLAS_FOUND ON) - set(CBLAS_PROVIDER vecLib) - set(CBLAS_INC_DIR ${VECLIB_INC_DIR}) - if(VECLIB_INC_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*") - set(CBLAS_LIBRARIES -lcblas "-framework vecLib") - message(STATUS "Found standalone vecLib.framework") - else() - set(CBLAS_LIBRARIES -lcblas "-framework Accelerate") - message(STATUS "Found vecLib as part of Accelerate.framework") - endif() - - add_definitions(-DPADDLE_USE_VECLIB) - endif() +if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER vecLib) + set(CBLAS_INC_DIR ${VECLIB_INC_DIR}) + add_definitions(-DPADDLE_USE_VECLIB) endif() diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 025928c21b..0b38943952 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -28,6 +28,7 @@ # IOS_DEPLOYMENT_TARGET # The minimum iOS deployment version, such as "7.0" # IOS_ENABLE_BITCODE = ON (default) or OFF +# IOS_USE_VECLIB_FOR_BLAS = OFF (default) or ON # IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder # By default this location is automatcially chosen based on the IOS_PLATFORM value above. # If set manually, it will override the default location and force the user of a particular Developer Platform @@ -97,6 +98,11 @@ if(NOT DEFINED IOS_ENABLE_BITCODE) endif() set(IOS_ENABLE_BITCODE ${IOS_ENABLE_BITCODE} CACHE BOOL "Whether to enable bitcode") +if(NOT DEFINED IOS_USE_VECLIB_FOR_BLAS) + set(IOS_USE_VECLIB_FOR_BLAS OFF) +endif() +set(IOS_USE_VECLIB_FOR_BLAS ${IOS_UES_VECLIB_FOR_BLAS} CACHE BOOL "Whether to use veclib") + # Check the platform selection and setup for developer root if(${IOS_PLATFORM} STREQUAL "OS") set(IOS_PLATFORM_LOCATION "iPhoneOS.platform") @@ -245,8 +251,33 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_ set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") -set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") -set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") +set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") + +if(IOS_USE_VECLIB_FOR_BLAS) + # Find vecLib for iOS + set(VECLIB_SEARCH_DIRS + ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks + ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Frameworks + ) + find_path(VECLIB_INC_DIR vecLib.h PATHS ${VECLIB_SEARCH_DIRS}/vecLib.framework/Headers) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(vecLib DEFAULT_MSG VECLIB_INC_DIR) + + if(VECLIB_FOUND) + if(VECLIB_INC_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*") + set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework vecLib") + message(STATUS "Found standalone vecLib.framework") + else() + set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework Accelerate") + message(STATUS "Found vecLib as part of Accelerate.framework") + endif() + + endif() +endif() + +set(CMAKE_C_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_CXX_LINK_FLAGS}") set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) if(NOT IOS_ENABLE_BITCODE) From 49d4b39f28458fbc6a071affd66e88f86c08aee8 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 19 Sep 2017 11:07:18 +0800 Subject: [PATCH 15/22] fix typo and remove some unused code --- paddle/gserver/layers/MKLDNNConvLayer.cpp | 5 +---- paddle/gserver/layers/MKLDNNFcLayer.cpp | 4 +--- paddle/gserver/layers/MKLDNNLayer.h | 4 ++++ paddle/gserver/layers/MKLDNNPoolLayer.cpp | 1 - paddle/gserver/tests/test_MKLDNN.cpp | 1 - 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 9088744bee..2647cb6006 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -294,12 +294,9 @@ void MKLDNNConvLayer::resetOutValue( std::shared_ptr& pd, MKLDNNMatrixPtr& out) { out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc()); - // change original output value from cpu matrix to mkldnn matrix - output_.value = std::dynamic_pointer_cast(out); - // create reorder if output value has cpu device and pd do not match cpuOutVal_ = nullptr; - cpuOutVal_ = nullptr; + cvtOutVal_ = nullptr; if (!outputIsOnlyMKLDNN()) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index f60e221a6e..66b358bcea 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -172,12 +172,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); - // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(out); if (!outputIsOnlyMKLDNN()) { // fc cpu output value do not need create convert // just share point - getOutput(CPU_DEVICE).value->setData(output_.value->getData()); + getOutput(CPU_DEVICE).value->setData(out->getData()); } } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 169679c829..c4e4a6874e 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -119,6 +119,10 @@ public: inputElemenCnt_ = elemenCnt; reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); + if (outVal_) { + // change original output value to mkldnn output value + output_.value = std::dynamic_pointer_cast(outVal_); + } convertWeightsFromPaddle(); needResetBwd_ = true; } diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index 48b2f5a4cb..b62dfb7c54 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -134,7 +134,6 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; out = MKLDNNMatrix::create( output_.value, outDims, inVal_->getFormat(), engine_); - output_.value = std::dynamic_pointer_cast(out); // create reorder if output value has cpu device and pd do not match cpuOutVal_ = nullptr; diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index b593f65fe4..7620365efa 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -162,7 +162,6 @@ void testPoolLayer(const testPoolDesc& pm) { 0}); LayerInputConfig* input = cfg.layerConfig.add_inputs(); PoolConfig* pool = input->mutable_pool_conf(); - // pool->set_pool_type(poolType); pool->set_channels(pm.ch); pool->set_img_size(pm.iw); pool->set_img_size_y(pm.ih); From 24f13b1a598fd6356a67a8f9aa339c17098799c3 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 19 Sep 2017 13:33:37 +0800 Subject: [PATCH 16/22] add MKLDNN activation files --- .../gserver/activations/MKLDNNActivation.cpp | 84 ++++++++++ paddle/gserver/activations/MKLDNNActivation.h | 144 ++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 paddle/gserver/activations/MKLDNNActivation.cpp create mode 100644 paddle/gserver/activations/MKLDNNActivation.h diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/gserver/activations/MKLDNNActivation.cpp new file mode 100644 index 0000000000..7fa5a4587c --- /dev/null +++ b/paddle/gserver/activations/MKLDNNActivation.cpp @@ -0,0 +1,84 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNActivation.h" +#include "mkldnn.hpp" +#include "paddle/utils/ClassRegistrar.h" + +namespace paddle { + +static ClassRegistrar gMKLDNNActivationRegistrar; +/** + * @def MKLDNN_ACTIVATION_CLASS_NAME + * @note MKLDNN_ACTIVATION_CLASS_NAME(relu) relu_; + * means mkldnn_reluActivation relu_; + */ +#define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation + +/** + * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION + */ +#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA) \ + class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \ + : public MKLDNNEltwiseActivation { \ + private: \ + static const std::string name; \ + static const float alpha; \ + \ + public: \ + const std::string& getName() const { return name; } \ + float getAlpha() const { return alpha; } \ + }; \ + const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ + "mkldnn_" #ACT_TYPE; \ + const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ + static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ + gMKLDNNActivationRegistrar \ + .registerClass( \ + "mkldnn_" #ACT_TYPE); \ + }); + +/** + * @brief MKLDNN Relu Activation. + * Actually mkldnn_relu is Leaky Relu. + * f(x) = x (x >= 0) + * f(x) = negative_slope * x (x < 0) + * @note the negative_slope should be -0.f + */ +DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f) + +/** + * @brief MKLDNN Tanh Activation. + */ +DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f) + +/** + * @brief MKLDNN ELU(Exponential Linear Unit) Activation. + * f(x) = x (x >= 0) + * f(x) = negative_slope * (exp(x) - 1) (x < 0) + */ +DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f) + +ActivationFunction* MKLDNNActivation::create(const std::string& type) { + return gMKLDNNActivationRegistrar.createByType(type); +} + +std::vector MKLDNNActivation::getAllRegisteredTypes() { + std::vector types; + gMKLDNNActivationRegistrar.forEachType( + [&](const std::string& type) { types.push_back(type); }); + return types; +} + +} // namespace paddle diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h new file mode 100644 index 0000000000..3afab609be --- /dev/null +++ b/paddle/gserver/activations/MKLDNNActivation.h @@ -0,0 +1,144 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "ActivationFunction.h" +#include "mkldnn.hpp" +#include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/math/MKLDNNMatrix.h" +#include "paddle/parameter/Argument.h" + +namespace paddle { + +/** + * @brief Base class of MKLDNN Activation. + * Common activation function are provieded, + * including mkldnn_relu, mkldnn_elu, mkldnn_tanh, mkldnn_softmax + */ +class MKLDNNActivation : public ActivationFunction { +protected: + // input value element count + size_t cnt_; + // mkldnn matrix, primitive, stream and pipeline + MKLDNNMatrixPtr val_; + MKLDNNMatrixPtr grad_; + std::shared_ptr stream_; + std::shared_ptr fwd_; + std::shared_ptr bwd_; + std::vector pipelineFwd_; + std::vector pipelineBwd_; + +public: + MKLDNNActivation() : cnt_(0) {} + ~MKLDNNActivation() {} + static ActivationFunction* create(const std::string& type); + static std::vector getAllRegisteredTypes(); + virtual const std::string& getName() const = 0; + virtual Error __must_check forward(Argument& act) = 0; + virtual Error __must_check backward(Argument& act) = 0; +}; + +/** + * @brief Base class of MKLDNN Eltwise Activation, + * includes mkldnn_relu, mkldnn_elu and mkldnn_tanh. + */ +class MKLDNNEltwiseActivation : public MKLDNNActivation { + typedef mkldnn::eltwise_forward eltwise_fwd; + typedef mkldnn::eltwise_backward eltwise_bwd; + +public: + MKLDNNEltwiseActivation() {} + + ~MKLDNNEltwiseActivation() {} + + virtual const std::string& getName() const = 0; + virtual float getAlpha() const = 0; + virtual float getBeta() const { return 0.f; } + + /** + * reshape and reset the forward and backward primitives + */ + void resetPrimitives(Argument& act) { + if (cnt_ == act.value->getElementCnt()) { + return; + } + cnt_ = act.value->getElementCnt(); + stream_.reset(new MKLDNNStream()); + auto eng = CPUEngine::Instance().getEngine(); + + // get algo setting + mkldnn::algorithm algo; + if (this->getName() == "mkldnn_relu") { + algo = mkldnn::algorithm::eltwise_relu; + } else if (this->getName() == "mkldnn_tanh") { + algo = mkldnn::algorithm::eltwise_tanh; + } else if (this->getName() == "mkldnn_elu") { + algo = mkldnn::algorithm::eltwise_elu; + } else { + LOG(FATAL) << "Unkown eltwise activation type: " << this->getName(); + } + // note: alpha represents the NegativeSlope when used in relu. + float alpha = getAlpha(); + float beta = getBeta(); + + /// forward + val_ = std::dynamic_pointer_cast(act.value); + if (val_ == nullptr) { + int bs = act.getBatchSize(); + int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1; + int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1; + int ic = cnt_ / bs / ih / iw; + CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw); + val_ = MKLDNNMatrix::create( + act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, eng); + CHECK(val_); + } + auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training, + algo, + val_->getMemoryDesc(), + alpha, + beta); + auto fwdPD = eltwise_fwd::primitive_desc(fwdDesc, eng); + // inplace buffer, dst = src + fwd_.reset(new eltwise_fwd(fwdPD, *val_, *val_)); + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); + + /// backward + if (act.grad == nullptr) { + grad_ = nullptr; + return; + } + grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc()); + auto bwdDesc = eltwise_bwd::desc( + algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta); + auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, fwdPD); + bwd_.reset(new eltwise_bwd(bwdPD, *val_, *grad_, *grad_)); + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwd_); + } + + Error __must_check forward(Argument& act) { + resetPrimitives(act); + stream_->submit(pipelineFwd_); + return Error(); + } + + Error __must_check backward(Argument& act) { + stream_->submit(pipelineBwd_); + return Error(); + } +}; + +} // namespace paddle From d8046da0cd33d6d79ce687623392ec9c73d2001c Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 19 Sep 2017 17:33:16 +0800 Subject: [PATCH 17/22] Use soft_label attribute for cross-entropy. --- paddle/operators/cross_entropy_op.cc | 95 ++++++++++++++----- paddle/operators/cross_entropy_op.cu | 31 ++---- paddle/operators/cross_entropy_op.h | 25 ++--- .../framework/tests/test_cross_entropy_op.py | 73 ++++++++++---- 4 files changed, 138 insertions(+), 86 deletions(-) diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 61d2104b95..953367eb8b 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -25,25 +25,32 @@ class CrossEntropyOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input(X) of CrossEntropyOp must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), - "Input(Label) of CrossEntropyOp must not be null."); - PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), - "Output(Y) of CrossEntropyOp must not be null."); - - auto *x = ctx.Input("X"); - auto *label = ctx.Input("Label"); - - PADDLE_ENFORCE_EQ(x->dims().size(), 2, "X's rank must be 2."); - PADDLE_ASSERT(label->dims().size() == 1 || label->dims().size() == 2); - if (label->dims().size() == 2) { - // soft cross entropy - PADDLE_ENFORCE_EQ(x->dims(), label->dims()); + "Input(Label) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), "Output(Y) must not be null."); + + auto x = ctx.Input("X"); + auto label = ctx.Input("Label"); + PADDLE_ENFORCE_EQ(x->dims().size(), 2, "Input(X)'s rank must be 2."); + PADDLE_ENFORCE_EQ(label->dims().size(), 2, + "Input(Label)'s rank must be 2."); + // TODO(xinghai-sun): remove this check after swtiching to bool + PADDLE_ENFORCE(ctx.Attr("soft_label") == 0 || + ctx.Attr("soft_label") == 1); + PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0], + "The 1st dimension of Input(X) and Input(Label) must " + "be equal."); + if (ctx.Attr("soft_label") == 1) { + PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1], + "If Attr(soft_label) == 1, The 2nd dimension of " + "Input(X) and Input(Label) must be equal."); } else { - // normal cross entropy - PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0]); + PADDLE_ENFORCE_EQ(label->dims()[1], 1, + "If Attr(soft_label) == 0, The 2nd dimension of " + "Input(Label) must be 1."); } + ctx.Output("Y")->Resize({x->dims()[0], 1}); } }; @@ -54,12 +61,41 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input(X) of CrossEntropyOp must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), + "Input(Label) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), + "Input(Y@GRAD) must not be null."); - auto dx = ctx.Output(framework::GradVarName("X")); auto x = ctx.Input("X"); + auto label = ctx.Input("Label"); + auto dy = ctx.Input(framework::GradVarName("Y")); + PADDLE_ENFORCE_EQ(x->dims().size(), 2, "Input(X)'s rank must be 2."); + PADDLE_ENFORCE_EQ(dy->dims().size(), 2, "Input(Y@Grad)'s rank must be 2."); + PADDLE_ENFORCE_EQ(label->dims().size(), 2, + "Input(Label)'s rank must be 2."); + // TODO(xinghai-sun): remove this check after swtiching to bool + PADDLE_ENFORCE(ctx.Attr("soft_label") == 0 || + ctx.Attr("soft_label") == 1); + PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0], + "The 1st dimension of Input(X) and Input(Label) must " + "be equal."); + PADDLE_ENFORCE_EQ(x->dims()[0], dy->dims()[0], + "The 1st dimension of Input(X) and Input(Y@Grad) must " + "be equal."); + PADDLE_ENFORCE_EQ(dy->dims()[1], 1, + "The 2nd dimension of Input(Y@Grad) must be 1."); + if (ctx.Attr("soft_label") == 1) { + PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1], + "If Attr(soft_label) == 1, The 2nd dimension of " + "Input(X) and Input(Label) must be equal."); + } else { + PADDLE_ENFORCE_EQ(label->dims()[1], 1, + "If Attr(soft_label) == 0, The 2nd dimension of " + "Input(Label) must be 1."); + } + auto dx = ctx.Output(framework::GradVarName("X")); dx->Resize(x->dims()); } }; @@ -72,22 +108,31 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The first input of CrossEntropyOp"); AddInput("Label", "The second input of CrossEntropyOp"); AddOutput("Y", "The output of CrossEntropyOp"); + AddAttr("soft_label", "Is soft label. Default zero.").SetDefault(0); + AddComment(R"DOC( CrossEntropy Operator. -The second input (Label tensor) supports two kinds of shapes: -1) Rank(Label) = 1, Label[i] indicates the class index for sample i: +It supports both standard cross-entropy and soft-label cross-entropy loss +computation. +1) One-hot cross-entropy: + soft_label = 0, Label[i, 0] indicates the class index for sample i: Y[i] = -log(X[i, Label[i]]) -2) Rank(Label) = 2, Label[i, j] indicates the soft label of class j - for sample i: +2) Soft-label cross-entropy: + soft_label = 1, Label[i, j] indicates the soft label of class j + for sample i: Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} Please make sure that in this case the summuation of each row of Label - equals one. If each row of Label has only one non-zero element (equals 1), - it degenerates to a standard one-hot representation. + equals one. + +3) One-hot cross-entropy with vecterized Input(Label): + As a special case of 2), when each row of Input(Label) has only one + non-zero element (equals 1), soft-label cross-entropy degenerates to a + one-hot cross-entropy with one-hot label representation. )DOC"); } }; diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index e80dcec8e2..ab6ad0e062 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -13,27 +13,13 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" +#include "paddle/operators/cross_entropy_op.h" #include "paddle/platform/assert.h" #include "paddle/platform/hostdevice.h" namespace paddle { namespace operators { -using Tensor = framework::Tensor; - -template -HOSTDEVICE T tolerable_value(const T x) { - PADDLE_ASSERT(std::is_floating_point::value); - const T kApproInf = 1e20; - if (x == INFINITY) { - return kApproInf; - } - if (x == -INFINITY) { - return -kApproInf; - } - return x; -} - template __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, const int N, const int D) { @@ -53,9 +39,9 @@ __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, i += blockDim.x * gridDim.x) { T sum = static_cast(0); for (int j = 0; j < D; j++) { - sum += label[i * D + j] * log(X[i * D + j]); + sum += label[i * D + j] * tolerable_value(log(X[i * D + j])); } - Y[i] = -tolerable_value(sum); + Y[i] = -sum; } } @@ -85,6 +71,7 @@ template __global__ void SoftCrossEntropyGradientKernel(T* dX, const T* dY, const T* X, const T* label, const int N, const int D) { + // TOOD(qingqing): optimize for this kernel for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { for (int j = 0; j < D; ++j) { @@ -115,14 +102,11 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel { int grid = (n + block - 1) / block; // TODO(qingqing) launch kernel on specified stream // base on ExecutionContext. - int label_rank = label->dims().size(); - if (label_rank == 2) { - // soft cross entropy + if (ctx.Attr("soft_label") == 1) { auto* label_data = ctx.Input("Label")->data(); SoftCrossEntropyKernel<<>>(y_data, x_data, label_data, n, d); } else { - // normal cross entropy auto* label_data = ctx.Input("Label")->data(); CrossEntropyKernel<<>>(y_data, x_data, label_data, n, d); } @@ -153,14 +137,11 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel { grid = (n + block - 1) / block; // TODO(qingqing): launch kernel on specified stream // base on ExecutionContext. - int label_rank = label->dims().size(); - if (label_rank == 2) { - // soft cross entropy + if (ctx.Attr("soft_label") == 1) { auto* label_data = label->data(); SoftCrossEntropyGradientKernel<<>>( dx_data, dy_data, x_data, label_data, n, d); } else { - // normal cross entropy auto* label_data = label->data(); CrossEntropyGradientKernel<<>>(dx_data, dy_data, x_data, label_data, n, d); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index 9a661cb9cf..1b4b23ac20 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/op_registry.h" +#include "paddle/platform/hostdevice.h" namespace paddle { namespace operators { @@ -21,21 +22,15 @@ namespace operators { using Tensor = framework::Tensor; template -inline T tolerable_value(const T x) { - static_assert(std::is_floating_point::value, - "tolerable_value works only on float, " - "double and double double."); - +HOSTDEVICE T tolerable_value(const T x) { + PADDLE_ASSERT(std::is_floating_point::value); const T kApproInf = 1e20; - if (x == INFINITY) { return kApproInf; } - if (x == -INFINITY) { return -kApproInf; } - return x; } @@ -55,22 +50,19 @@ class CrossEntropyOpKernel : public framework::OpKernel { int batch_size = x->dims()[0]; int class_num = x->dims()[1]; - int label_rank = ctx.Input("Label")->dims().size(); - if (label_rank == 2) { - // soft cross entropy + if (ctx.Attr("soft_label") == 1) { auto* label_data = ctx.Input("Label")->data(); int index = 0; for (int i = 0; i < batch_size; ++i) { T sum = static_cast(0); for (int j = 0; j < class_num; ++j) { - sum += label_data[index] * std::log(x_data[index]); - y_data[i] = -tolerable_value(sum); + sum += label_data[index] * tolerable_value(std::log(x_data[index])); + y_data[i] = -sum; index++; } } } else { - // normal cross entropy auto* label_data = ctx.Input("Label")->data(); for (int i = 0; i < batch_size; ++i) { int index = i * class_num + label_data[i]; @@ -98,11 +90,9 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { int batch_size = x->dims()[0]; int class_num = x->dims()[1]; - int label_rank = ctx.Input("Label")->dims().size(); // TODO(qingqing): make zero setting an common function. - if (label_rank == 2) { - // soft cross entropy + if (ctx.Attr("soft_label") == 1) { auto* label_data = ctx.Input("Label")->data(); int index = 0; for (int i = 0; i < batch_size; ++i) { @@ -112,7 +102,6 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { } } } else { - // normal cross entropy auto* label_data = label->data(); memset(dx_data, 0, sizeof(T) * batch_size * class_num); for (int i = 0; i < batch_size; ++i) { diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index ccff2a386d..0206ca064b 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,23 +1,25 @@ import unittest -import numpy +import numpy as np from op_test import OpTest -class TestOnehotCrossEntropyOp(OpTest): +class TestCrossEntropyOp1(OpTest): + """Test standard cross-entropy, with index representation of labels. + """ + def setUp(self): self.op_type = "cross_entropy" batch_size = 30 class_num = 10 - - X = numpy.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") - labels = numpy.random.randint(0, class_num, batch_size, dtype="int32") - - cross_entropy = numpy.asmatrix( - [[-numpy.log(X[i][labels[i]])] for i in range(X.shape[0])], + X = np.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + label = np.random.randint(0, class_num, (batch_size, 1), dtype="int32") + cross_entropy = np.asmatrix( + [[-np.log(X[i][label[i][0]])] for i in range(X.shape[0])], dtype="float32") - self.inputs = {"X": X, "Label": labels} + self.inputs = {"X": X, "Label": label} self.outputs = {"Y": cross_entropy} + self.attrs = {'soft_label': 0} def test_check_output(self): self.check_output() @@ -26,20 +28,55 @@ class TestOnehotCrossEntropyOp(OpTest): self.check_grad(["X"], "Y") -class TestCrossEntropySoftLabel(OpTest): +class TestCrossEntropyOp2(OpTest): + """Test soft-label cross-entropy, with vecterized soft labels. + """ + def setUp(self): self.op_type = "cross_entropy" - batch_size = 30 - class_num = 10 - X = numpy.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") - label = numpy.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + batch_size = 10 + class_num = 5 + X = np.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + label = np.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") label /= label.sum(axis=1, keepdims=True) + cross_entropy = (-label * np.log(X)).sum( + axis=1, keepdims=True).astype("float32") self.inputs = {'X': X, 'Label': label} - cross_entropy = (-label * numpy.log(X)).sum( + self.outputs = {'Y': cross_entropy} + self.attrs = {'soft_label': 1} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y') + + +class TestCrossEntropyOp3(OpTest): + """Test one-hot cross-entropy, with vecterized one-hot representation of + labels. + """ + + def setUp(self): + self.op_type = "cross_entropy" + batch_size = 30 + class_num = 10 + X = np.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + label_index = np.random.randint( + 0, class_num, (batch_size), dtype="int32") + label = np.zeros(X.shape) + label[np.arange(batch_size), label_index] = 1 + cross_entropy = np.asmatrix( + [[-np.log(X[i][label_index[i]])] for i in range(X.shape[0])], + dtype="float32") + cross_entropy2 = (-label * np.log(X)).sum( axis=1, keepdims=True).astype("float32") + self.inputs = {'X': X, 'Label': label} self.outputs = {'Y': cross_entropy} + self.attrs = {'soft_label': 1} def test_check_output(self): self.check_output() From 19de8ae1419e327f35855ebbaf13fbdfe10aae58 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 19 Sep 2017 19:18:34 +0800 Subject: [PATCH 18/22] Fixed a error in mnist unitest. --- python/paddle/v2/framework/tests/test_mnist.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_mnist.py b/python/paddle/v2/framework/tests/test_mnist.py index 10f2810ad0..66452cb396 100644 --- a/python/paddle/v2/framework/tests/test_mnist.py +++ b/python/paddle/v2/framework/tests/test_mnist.py @@ -128,7 +128,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): def cross_entropy_layer(net, input, label): cost_name = "cross_entropy_%d" % uniq_id() cross_entropy_op = Operator( - "cross_entropy", X=input, label=label, Y=cost_name) + "cross_entropy", X=input, Label=label, Y=cost_name) net.append_op(cross_entropy_op) scope.new_var(cost_name) net.infer_shape(scope) @@ -181,7 +181,7 @@ def error_rate(predict, label): images = data_layer(name="pixel", dims=[BATCH_SIZE, 784]) -labels = data_layer(name="label", dims=[BATCH_SIZE]) +labels = data_layer(name="label", dims=[BATCH_SIZE, 1]) fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") predict = fc_layer(net=forward_net, input=fc2, size=10, act="softmax") @@ -215,6 +215,7 @@ def test(cost_name): for data in test_reader(): image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + label_data = numpy.expand_dims(label_data, axis=1) feed_data(images, image_data) feed_data(labels, label_data) @@ -235,6 +236,7 @@ for pass_id in range(PASS_NUM): for data in train_reader(): image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + label_data = numpy.expand_dims(label_data, axis=1) feed_data(images, image_data) feed_data(labels, label_data) From 9d692e3bccc712dd6b9410c7e931ab37eb484435 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 19 Sep 2017 21:21:09 +0800 Subject: [PATCH 19/22] add gtest for MKLDNN activation and pass them --- .../activations/ActivationFunction.cpp | 11 ++- .../gserver/activations/MKLDNNActivation.cpp | 47 ++++++----- paddle/gserver/activations/MKLDNNActivation.h | 84 ++++++++++++++----- paddle/gserver/tests/MKLDNNTester.cpp | 40 ++++++--- paddle/gserver/tests/MKLDNNTester.h | 3 +- paddle/gserver/tests/test_MKLDNN.cpp | 46 +++++++++- 6 files changed, 172 insertions(+), 59 deletions(-) diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 78e958e06f..8b7b2e9b65 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -22,9 +22,12 @@ limitations under the License. */ #include #include "paddle/parameter/Argument.h" #include "paddle/utils/ClassRegistrar.h" - #include "paddle/utils/Logging.h" +#ifdef PADDLE_USE_MKLDNN +#include "MKLDNNActivation.h" +#endif + namespace paddle { static ClassRegistrar gActivationRegistrar; @@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) { END_DEFINE_ACTIVATION(log) ActivationFunction* ActivationFunction::create(const std::string& type) { +#ifdef PADDLE_USE_MKLDNN + if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) { + return MKLDNNActivation::create(type); + } +#endif + return gActivationRegistrar.createByType(type); } diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/gserver/activations/MKLDNNActivation.cpp index 7fa5a4587c..ac50937ef3 100644 --- a/paddle/gserver/activations/MKLDNNActivation.cpp +++ b/paddle/gserver/activations/MKLDNNActivation.cpp @@ -29,24 +29,27 @@ static ClassRegistrar gMKLDNNActivationRegistrar; /** * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION */ -#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA) \ - class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \ - : public MKLDNNEltwiseActivation { \ - private: \ - static const std::string name; \ - static const float alpha; \ - \ - public: \ - const std::string& getName() const { return name; } \ - float getAlpha() const { return alpha; } \ - }; \ - const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ - "mkldnn_" #ACT_TYPE; \ - const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ - static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ - gMKLDNNActivationRegistrar \ - .registerClass( \ - "mkldnn_" #ACT_TYPE); \ +#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA) \ + class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \ + : public MKLDNNEltwiseActivation { \ + private: \ + static const std::string name; \ + static const float alpha; \ + static const float bwdAlpha; \ + \ + public: \ + const std::string& getName() const { return name; } \ + float getAlpha() const { return alpha; } \ + float getBwdAlpha() const { return bwdAlpha; } \ + }; \ + const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ + "mkldnn_" #ACT_TYPE; \ + const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ + const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \ + static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ + gMKLDNNActivationRegistrar \ + .registerClass( \ + "mkldnn_" #ACT_TYPE); \ }); /** @@ -54,21 +57,21 @@ static ClassRegistrar gMKLDNNActivationRegistrar; * Actually mkldnn_relu is Leaky Relu. * f(x) = x (x >= 0) * f(x) = negative_slope * x (x < 0) - * @note the negative_slope should be -0.f + * @note the negative_slope should be -0.f in forward */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f) +DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f) /** * @brief MKLDNN Tanh Activation. */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f) +DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f) /** * @brief MKLDNN ELU(Exponential Linear Unit) Activation. * f(x) = x (x >= 0) * f(x) = negative_slope * (exp(x) - 1) (x < 0) */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f) +DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f) ActivationFunction* MKLDNNActivation::create(const std::string& type) { return gMKLDNNActivationRegistrar.createByType(type); diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h index 3afab609be..bda9bbebe5 100644 --- a/paddle/gserver/activations/MKLDNNActivation.h +++ b/paddle/gserver/activations/MKLDNNActivation.h @@ -30,6 +30,9 @@ class MKLDNNActivation : public ActivationFunction { protected: // input value element count size_t cnt_; + // should not merge the resetBwd into resetFwd, + // because the grad data would be changing before backward. + bool needResetBwd_; // mkldnn matrix, primitive, stream and pipeline MKLDNNMatrixPtr val_; MKLDNNMatrixPtr grad_; @@ -40,7 +43,7 @@ protected: std::vector pipelineBwd_; public: - MKLDNNActivation() : cnt_(0) {} + MKLDNNActivation() : cnt_(0), needResetBwd_(true) {} ~MKLDNNActivation() {} static ActivationFunction* create(const std::string& type); static std::vector getAllRegisteredTypes(); @@ -57,19 +60,43 @@ class MKLDNNEltwiseActivation : public MKLDNNActivation { typedef mkldnn::eltwise_forward eltwise_fwd; typedef mkldnn::eltwise_backward eltwise_bwd; +protected: + // save the forward primitive desc, which can be used backward + std::shared_ptr fwdPD_; + // eltwise_bwd need src input value + MKLDNNMatrixPtr inVal_; + // use for copy data + std::shared_ptr copyInVal_; + public: MKLDNNEltwiseActivation() {} ~MKLDNNEltwiseActivation() {} virtual const std::string& getName() const = 0; + + // in common, the alpha of forward and backward should be equal. + // but for relu, to avoid negative value, they should be opposite virtual float getAlpha() const = 0; + virtual float getBwdAlpha() const = 0; virtual float getBeta() const { return 0.f; } + virtual mkldnn::algorithm getAlgo(const std::string& type) const { + if (type == "mkldnn_relu") { + return mkldnn::algorithm::eltwise_relu; + } else if (type == "mkldnn_tanh") { + return mkldnn::algorithm::eltwise_tanh; + } else if (type == "mkldnn_elu") { + return mkldnn::algorithm::eltwise_elu; + } else { + LOG(FATAL) << "Unkown eltwise activation type: " << type; + } + return (mkldnn::algorithm)0; + } /** - * reshape and reset the forward and backward primitives + * reshape and reset the forward primitives */ - void resetPrimitives(Argument& act) { + void resetFwd(Argument& act) { if (cnt_ == act.value->getElementCnt()) { return; } @@ -78,21 +105,13 @@ public: auto eng = CPUEngine::Instance().getEngine(); // get algo setting - mkldnn::algorithm algo; - if (this->getName() == "mkldnn_relu") { - algo = mkldnn::algorithm::eltwise_relu; - } else if (this->getName() == "mkldnn_tanh") { - algo = mkldnn::algorithm::eltwise_tanh; - } else if (this->getName() == "mkldnn_elu") { - algo = mkldnn::algorithm::eltwise_elu; - } else { - LOG(FATAL) << "Unkown eltwise activation type: " << this->getName(); - } + mkldnn::algorithm algo = getAlgo(this->getName()); // note: alpha represents the NegativeSlope when used in relu. float alpha = getAlpha(); float beta = getBeta(); /// forward + pipelineFwd_.clear(); val_ = std::dynamic_pointer_cast(act.value); if (val_ == nullptr) { int bs = act.getBatchSize(); @@ -109,33 +128,52 @@ public: val_->getMemoryDesc(), alpha, beta); - auto fwdPD = eltwise_fwd::primitive_desc(fwdDesc, eng); - // inplace buffer, dst = src - fwd_.reset(new eltwise_fwd(fwdPD, *val_, *val_)); - pipelineFwd_.clear(); + fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng)); + // use inplace for forward but save input value before submit + inVal_ = val_; + if (act.grad) { + // only copy when need do backward + inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc()); + copyInVal_ = std::make_shared(*val_, *inVal_); + CHECK(copyInVal_) << "should not be emptry"; + pipelineFwd_.push_back(*copyInVal_); + } + fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_)); pipelineFwd_.push_back(*fwd_); + needResetBwd_ = true; + } - /// backward - if (act.grad == nullptr) { - grad_ = nullptr; + /** + * reset the backward primitives, can not merge into resetFwd as the grad data + * would be changing before backward. + */ + void resetBwd(Argument& act) { + if (!needResetBwd_) { return; } + needResetBwd_ = false; + mkldnn::algorithm algo = getAlgo(this->getName()); + float alpha = getBwdAlpha(); + float beta = getBeta(); grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc()); + auto eng = CPUEngine::Instance().getEngine(); auto bwdDesc = eltwise_bwd::desc( algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta); - auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, fwdPD); - bwd_.reset(new eltwise_bwd(bwdPD, *val_, *grad_, *grad_)); + auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_); + CHECK(inVal_); + bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_)); pipelineBwd_.clear(); pipelineBwd_.push_back(*bwd_); } Error __must_check forward(Argument& act) { - resetPrimitives(act); + resetFwd(act); stream_->submit(pipelineFwd_); return Error(); } Error __must_check backward(Argument& act) { + resetBwd(act); stream_->submit(pipelineBwd_); return Error(); } diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index 2f48e5b2d3..f59618be9d 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -64,15 +64,17 @@ void MKLDNNTester::reset(const TestConfig& dnn, configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); } refLayer_ = testLayers_[REF]; - dnnLayer_ = std::dynamic_pointer_cast(testLayers_[DNN]); - CHECK(dnnLayer_); - // for comparison with Paddle reference results, - // need manually add cpu device output for test - dnnLayer_->addOutputArgument(CPU_DEVICE); + dnnLayer_ = testLayers_[DNN]; EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); - setInputImgSize(); + + // for comparison with Paddle reference results, + // need manually add cpu device output for test + MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast(dnnLayer_); + if (dnnLayer) { + dnnLayer->addOutputArgument(CPU_DEVICE); + } } void MKLDNNTester::setInputImgSize() { @@ -122,7 +124,7 @@ void MKLDNNTester::randomTopDiffs() { void MKLDNNTester::checkForward() { VLOG(MKLDNN_ALL) << "Check Forward"; printTopDatas(); - double delta = compareMatrix(dnnLayer_->getOutput(-1).value, + double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, refLayer_->getOutputValue()); EXPECT_LE(fabs(delta), eps_); } @@ -155,7 +157,10 @@ void MKLDNNTester::checkBackwardWgts() { vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); - dnnLayer_->convertWeightsToPaddle(); + MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast(dnnLayer_); + if (dnnLayer) { + dnnLayer->convertWeightsToPaddle(); + } for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); @@ -322,6 +327,10 @@ void MKLDNNTester::runOnce() { // and clearTopDatas(REF) should be coverd by ref layers clearBotDiffs(REF); clearWgtDiffs(REF); + // it is necessary to clear bottom diffs when only activation is dnn type + if (configs_[DNN].layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) { + clearBotDiffs(DNN); + } } void MKLDNNTester::run(const TestConfig& dnn, @@ -333,8 +342,19 @@ void MKLDNNTester::run(const TestConfig& dnn, float epsilon, bool log, int level) { - VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() - << " vs " << ref.layerConfig.type(); + CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 || + dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) + << "should be MKLDNN layer or MKLDNN activation"; + if (dnn.layerConfig.type() == ref.layerConfig.type()) { + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " + << dnn.layerConfig.active_type() << " vs " + << ref.layerConfig.active_type(); + } else { + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " + << dnn.layerConfig.type() << " vs " + << ref.layerConfig.type(); + } + ih_ = inputImgH; iw_ = inputImgW; iter_ = iter; diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index 5ac885638c..171d176ee7 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -41,8 +41,7 @@ protected: vector layerMaps_; vector> parameters_; vector testLayers_; - LayerPtr refLayer_; - MKLDNNLayerPtr dnnLayer_; + LayerPtr refLayer_, dnnLayer_; /// run some iterations, all the result should pass size_t iter_; diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index 7620365efa..406181370f 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "MKLDNNTester.h" #include "ModelConfig.pb.h" +#include "paddle/gserver/activations/MKLDNNActivation.h" #include "paddle/math/MathUtils.h" using namespace paddle; // NOLINT @@ -190,7 +191,7 @@ void testPoolLayer(const testPoolDesc& pm) { } } -TEST(MkldnnLayer, PoolLayer) { +TEST(MKLDNNLayer, PoolLayer) { /* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/ testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2}); testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2}); @@ -202,6 +203,49 @@ TEST(MkldnnLayer, PoolLayer) { testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2}); } +struct testActDesc { + int bs, ch; + int ih, iw; +}; + +static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) { + cfg.biasSize = 0; + cfg.layerConfig.set_type("addto"); + cfg.layerConfig.set_size(pm.ch * pm.ih * pm.iw); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ch * pm.ih * pm.iw), + 0}); + cfg.layerConfig.add_inputs(); +} + +void testActivation(std::string& type, const testActDesc& pm) { + const std::string compareTypes[] = {type, type.erase(0, 7)}; + TestConfig cfg; + getAddtoConfig(cfg, pm); + + TestConfig ref = cfg; + cfg.layerConfig.set_active_type(compareTypes[0]); + ref.layerConfig.set_active_type(compareTypes[1]); + MKLDNNTester tester; + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } +} + +TEST(MKLDNNActivation, Activations) { + auto types = MKLDNNActivation::getAllRegisteredTypes(); + // TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation + std::set excluded{"mkldnn_softmax", "mkldnn_elu"}; + for (auto type : types) { + if (excluded.count(type)) { + continue; + } + testActivation(type, {16, 64, 32, 32}); + } +} + // TODO(TJ): add branch test int main(int argc, char** argv) { From 41a2321a0e5489f807a71bb935b1ea0f049f324d Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 19 Sep 2017 23:44:20 +0800 Subject: [PATCH 20/22] Refine platform::Transform function and fix prelu_op testing. --- paddle/operators/prelu_op.h | 10 ++-- paddle/platform/transform.h | 84 ++++++++++++++++++++----------- paddle/platform/transform_test.cu | 16 ++++-- 3 files changed, 72 insertions(+), 38 deletions(-) diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h index 63031c25cc..3269116c11 100644 --- a/paddle/operators/prelu_op.h +++ b/paddle/operators/prelu_op.h @@ -54,8 +54,9 @@ class PReluKernel : public framework::OpKernel { int numel = x->numel(); - Transform(context.device_context(), x_ptr, x_ptr + numel, o_ptr, - PReluFunctor(alpha_ptr)); + Transform trans; + trans(context.device_context(), x_ptr, x_ptr + numel, o_ptr, + PReluFunctor(alpha_ptr)); } }; @@ -91,8 +92,9 @@ class PReluGradKernel : public framework::OpKernel { const T* out_ptr = out->data(); int numel = dx->numel(); - Transform(context.device_context(), out_ptr, out_ptr + numel, dout_ptr, - dx_ptr, PReluGradFunctor(alpha_ptr)); + Transform trans; + trans(context.device_context(), out_ptr, out_ptr + numel, dout_ptr, dx_ptr, + PReluGradFunctor(alpha_ptr)); // TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready } diff --git a/paddle/platform/transform.h b/paddle/platform/transform.h index 8eaab047fd..f196868c72 100644 --- a/paddle/platform/transform.h +++ b/paddle/platform/transform.h @@ -29,45 +29,71 @@ namespace paddle { namespace platform { + // Transform on host or device. It provides the same API in std library. -template -void Transform(const DeviceContext& context, InputIter first, InputIter last, - OutputIter result, UnaryOperation op) { - auto place = context.GetPlace(); - if (is_cpu_place(place)) { +template +struct Transform { + template + void operator()(const DeviceContext& context, InputIter first, InputIter last, + OutputIter result, UnaryOperation op); + + template + void operator()(const DeviceContext& context, InputIter1 first1, + InputIter1 last1, InputIter2 first2, OutputIter result, + BinaryOperation op); +}; + +template <> +struct Transform { + template + void operator()(const DeviceContext& context, InputIter first, InputIter last, + OutputIter result, UnaryOperation op) { + auto place = context.GetPlace(); + PADDLE_ENFORCE(is_cpu_place(place), "It must use CPU place."); std::transform(first, last, result, op); - } else { -#ifdef __NVCC__ - auto& ctx = reinterpret_cast(context); - using namespace details; - thrust::transform(thrust::cuda::par.on(ctx.stream()), DevPtrCast(first), - DevPtrCast(last), DevPtrCast(result), op); -#else - PADDLE_THROW("Do not invoke `Transform` in .cc file"); -#endif } -} -template -void Transform(const DeviceContext& context, InputIter1 first1, - InputIter1 last1, InputIter2 first2, OutputIter result, - BinaryOperation op) { - auto place = context.GetPlace(); - if (is_cpu_place(place)) { + template + void operator()(const DeviceContext& context, InputIter1 first1, + InputIter1 last1, InputIter2 first2, OutputIter result, + BinaryOperation op) { + auto place = context.GetPlace(); + PADDLE_ENFORCE(is_cpu_place(place), "It must use CPU place."); std::transform(first1, last1, first2, result, op); - } else { + } +}; + #ifdef __NVCC__ +template <> +struct Transform { + template + void operator()(const DeviceContext& context, InputIter first, InputIter last, + OutputIter result, UnaryOperation op) { + auto place = context.GetPlace(); + PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place."); auto& ctx = reinterpret_cast(context); - using namespace details; - thrust::transform(thrust::cuda::par.on(ctx.stream()), DevPtrCast(first1), - DevPtrCast(last1), DevPtrCast(first2), DevPtrCast(result), + thrust::transform(thrust::cuda::par.on(ctx.stream()), + details::DevPtrCast(first), details::DevPtrCast(last), + details::DevPtrCast(result), op); + } + + template + void operator()(const DeviceContext& context, InputIter1 first1, + InputIter1 last1, InputIter2 first2, OutputIter result, + BinaryOperation op) { + auto place = context.GetPlace(); + PADDLE_ENFORCE(is_gpu_place(place), "It must use GPU place."); + auto& ctx = reinterpret_cast(context); + thrust::transform(thrust::cuda::par.on(ctx.stream()), + details::DevPtrCast(first1), details::DevPtrCast(last1), + details::DevPtrCast(first2), details::DevPtrCast(result), op); -#else - PADDLE_THROW("Do not invoke `Transform` in .cc file"); -#endif } }; +#endif } // namespace platform } // namespace paddle diff --git a/paddle/platform/transform_test.cu b/paddle/platform/transform_test.cu index b8a6200bb0..c76cab80e4 100644 --- a/paddle/platform/transform_test.cu +++ b/paddle/platform/transform_test.cu @@ -15,6 +15,7 @@ #include #include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" +#include "paddle/platform/hostdevice.h" #include "paddle/platform/transform.h" template @@ -38,7 +39,8 @@ TEST(Transform, CPUUnary) { using namespace paddle::platform; CPUDeviceContext ctx; float buf[4] = {0.1, 0.2, 0.3, 0.4}; - Transform(ctx, buf, buf + 4, buf, Scale(10)); + Transform trans; + trans(ctx, buf, buf + 4, buf, Scale(10)); for (int i = 0; i < 4; ++i) { ASSERT_NEAR(buf[i], static_cast(i + 1), 1e-5); } @@ -52,7 +54,8 @@ TEST(Transform, GPUUnary) { float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; float* gpu_buf = static_cast(Alloc(gpu0, sizeof(float) * 4)); Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf)); - Transform(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale(10)); + Transform trans; + trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale(10)); ctx.Wait(); Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf)); Free(gpu0, gpu_buf); @@ -65,7 +68,9 @@ TEST(Transform, CPUBinary) { using namespace paddle::platform; using namespace paddle::memory; int buf[4] = {1, 2, 3, 4}; - Transform(CPUDeviceContext(), buf, buf + 4, buf, buf, Multiply()); + Transform trans; + CPUDeviceContext ctx; + trans(ctx, buf, buf + 4, buf, buf, Multiply()); for (int i = 0; i < 4; ++i) { ASSERT_EQ((i + 1) * (i + 1), buf[i]); } @@ -79,11 +84,12 @@ TEST(Transform, GPUBinary) { CUDADeviceContext ctx(gpu0); int* gpu_buf = static_cast(Alloc(gpu0, sizeof(buf))); Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf)); - Transform(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply()); + Transform trans; + trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply()); ctx.Wait(); Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf)); Free(gpu0, gpu_buf); for (int i = 0; i < 4; ++i) { ASSERT_EQ((i + 1) * (i + 1), buf[i]); } -} \ No newline at end of file +} From 2aa4d326ecad57f0fba458581300464dcb7bb45a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 19 Sep 2017 23:45:39 +0800 Subject: [PATCH 21/22] Fix unit testint in test_prelu_op. --- python/paddle/v2/framework/tests/test_prelu_op.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/framework/tests/test_prelu_op.py index 76d1f1d5a4..2b6b7db368 100644 --- a/python/paddle/v2/framework/tests/test_prelu_op.py +++ b/python/paddle/v2/framework/tests/test_prelu_op.py @@ -17,10 +17,10 @@ class PReluTest(OpTest): assert out_np is not self.inputs['X'] self.outputs = {'Out': out_np} - def not_test_check_output(self): + def test_check_output(self): self.check_output() - def not_test_check_grad(self): + def test_check_grad(self): self.check_grad(['X'], 'Out') From 26e9ab476b4c0b3543b5db2ea318cd9b79373723 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 20 Sep 2017 10:49:36 +0800 Subject: [PATCH 22/22] Fix if-statement error in CMakeLists.txt and remove build_ios task from travis. --- .travis.yml | 7 ------- CMakeLists.txt | 12 +++++++----- paddle/scripts/travis/build_ios.sh | 2 +- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0a8edb42b4..d0e2696f10 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ cache: - $HOME/.ccache - $HOME/.cache/pip - $TRAVIS_BUILD_DIR/build/third_party - - $TRAVIS_BUILD_DIR/build_ios/third_party sudo: required dist: trusty os: @@ -12,12 +11,6 @@ os: env: - JOB=build_doc - JOB=check_style -matrix: - include: - - env: JOB=build_ios - os: osx - osx_image: xcode8.3 - compiler: clang addons: apt: packages: diff --git a/CMakeLists.txt b/CMakeLists.txt index e64e666985..4b564b4826 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,11 +65,13 @@ if(NOT CMAKE_BUILD_TYPE) endif() if(ANDROID OR IOS) - if(ANDROID AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "16") - message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16") - elseif(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") - # TODO: support glog for Android api 16 ~ 19 in the future - message(WARNING "Using the unofficial git repository instead") + if(ANDROID) + if(AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "16") + message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16") + elseif(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") + # TODO: support glog for Android api 16 ~ 19 in the future + message(WARNING "Using the unofficial git repository instead") + endif() endif() set(WITH_GPU OFF CACHE STRING diff --git a/paddle/scripts/travis/build_ios.sh b/paddle/scripts/travis/build_ios.sh index 3568b5ebcb..dee7cf7cbb 100755 --- a/paddle/scripts/travis/build_ios.sh +++ b/paddle/scripts/travis/build_ios.sh @@ -17,4 +17,4 @@ cmake -DCMAKE_SYSTEM_NAME=iOS \ -DCMAKE_BUILD_TYPE=Release \ .. -make +make -j 2