!11276 【MS】【LITE】【GPU】 reduce opencl so size 0.5M

From: @wangdongxu6
Reviewed-by: @ddwsky
Signed-off-by: @ddwsky
pull/11276/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 8e9086bbe6

File diff suppressed because it is too large Load Diff

@ -1,23 +1,20 @@
add_compile_definitions(USE_ANDROID_LOG)
if (ENABLE_V0)
add_definitions(-DENABLE_V0)
if(ENABLE_V0)
add_definitions(-DENABLE_V0)
endif()
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(${LITE_DIR}/nnacl/)
include_directories(${LITE_DIR}/nnacl/optimize)
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
#for performance
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (SUPPORT_GPU)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti")
else ()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
endif ()
endif ()
endif ()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
endif()
endif()
set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
@ -42,7 +39,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc
)
if (SUPPORT_GPU)
if(SUPPORT_GPU)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_kernel.cc
@ -54,10 +51,10 @@ if (SUPPORT_GPU)
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc
)
endif ()
endif()
if (SUPPORT_TRAIN)
if(SUPPORT_TRAIN)
set(ANF_SRC
${ANF_SRC}
)
@ -70,7 +67,7 @@ if (SUPPORT_TRAIN)
${CMAKE_CURRENT_SOURCE_DIR}/train/train_model.cc
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
)
endif ()
endif()
add_subdirectory(ops)
add_subdirectory(runtime/kernel/arm)
@ -85,53 +82,54 @@ set_target_properties(mindspore-lite_static PROPERTIES OUTPUT_NAME "mindspore-li
set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
if (SUPPORT_GPU)
if(SUPPORT_GPU)
add_subdirectory(runtime/kernel/opencl)
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl cpu_ops_mid)
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl_mid cpu_ops_mid)
else ()
else()
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid)
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid)
endif ()
if (SUPPORT_NPU)
endif()
if(SUPPORT_NPU)
add_subdirectory(runtime/agent/npu)
include_directories(${DDK_PATH})
target_link_libraries(mindspore-lite npu_kernel_mid)
target_link_libraries(mindspore-lite_static npu_kernel_mid)
endif ()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
endif()
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(mindspore-lite log)
target_link_libraries(mindspore-lite_static log)
endif ()
if (BUILD_MINDDATA STREQUAL "lite")
endif()
if(BUILD_MINDDATA STREQUAL "lite")
target_link_libraries(mindspore-lite minddata_eager_mid minddata-lite)
target_link_libraries(mindspore-lite_static minddata_eager_mid)
endif ()
endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
if(PLATFORM_ARM)
set(NDK_STRIP
"${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif ()
endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if(PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
elseif (NOT WIN32)
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif ()
endif ()
elseif(NOT WIN32)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif()
endif()
########################## build optimize and float16 library #################################3
if (PLATFORM_ARM64)
########################## build optimize and float16 library #################################
if(PLATFORM_ARM64)
target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid)
target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid)
if (ENABLE_FP16)
if(ENABLE_FP16)
target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid)
target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid)
endif ()
endif ()
endif()
endif()

@ -94,11 +94,14 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
MS_ASSERT(buffer);
MS_ASSERT(image);
MS_ASSERT(img_size.size() == 3);
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
if (data == nullptr) {
*image = new (std::nothrow)
cl::Image2D(*ocl_runtime_->Context(), image_format, **buffer, img_size[0], img_size[1], 0, &ret);
// copy from cl2.hpp
cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D, img_size[0], img_size[1], 0, 0, 0, 0, 0, 0, (**buffer).get()};
const cl::Context &context = *ocl_runtime_->Context();
cl_image_format image_format{CL_RGBA, static_cast<uint32_t>(img_size[2])};
*image = new (std::nothrow) cl::Image2D(clCreateImage(context.get(), 0, &image_format, &desc, nullptr, &ret));
} else {
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
*image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
image_format, img_size[0], img_size[1], 0, data, &ret);
}

@ -562,7 +562,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
if (command_queue == nullptr) {
command_queue = default_command_queue_;
}
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr);
}
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
@ -591,7 +591,7 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
if (command_queue == nullptr) {
command_queue = default_command_queue_;
}
return command_queue->enqueueUnmapSVM(host_ptr);
return clEnqueueSVMUnmap(command_queue->get(), host_ptr, 0, nullptr, nullptr);
}
bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {

@ -83,7 +83,7 @@ class OpenCLRuntime {
auto svm_capabilities = GetSVMCapabilities();
if (svm_capabilities) {
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
return kernel.setArg(index, value);
return clSetKernelArgSVMPointer(kernel.get(), index, value);
}
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;

@ -142,13 +142,13 @@ bool LoadLibraryFromPath(const std::string &library_path, void **handle_ptr) {
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyBufferToImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
LOAD_OPENCL_FUNCTION_PTR(clRetainDevice);
LOAD_OPENCL_FUNCTION_PTR(clReleaseDevice);
LOAD_OPENCL_FUNCTION_PTR(clCreateImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueFillImage);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
LOAD_OPENCL_FUNCTION_PTR(clCreateCommandQueueWithProperties);
LOAD_OPENCL_FUNCTION_PTR(clGetExtensionFunctionAddress);
LOAD_OPENCL_FUNCTION_PTR(clSVMAlloc);
@ -232,13 +232,13 @@ CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
CL_DEFINE_FUNC_PTR(clGetImageInfo);
CL_DEFINE_FUNC_PTR(clEnqueueCopyBufferToImage);
CL_DEFINE_FUNC_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
CL_DEFINE_FUNC_PTR(clRetainDevice);
CL_DEFINE_FUNC_PTR(clReleaseDevice);
CL_DEFINE_FUNC_PTR(clCreateImage);
CL_DEFINE_FUNC_PTR(clEnqueueFillImage);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
CL_DEFINE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
CL_DEFINE_FUNC_PTR(clGetExtensionFunctionAddress);
@ -651,7 +651,7 @@ cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_ima
event_wait_list, event);
}
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
// clRetainDevice wrapper, use OpenCLWrapper function.
cl_int clRetainDevice(cl_device_id device) {
@ -685,7 +685,7 @@ cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const vo
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
// clCreateCommandQueueWithProperties wrapper, use OpenCLWrapper function.
cl_command_queue clCreateCommandQueueWithProperties(cl_context context, cl_device_id device,

@ -110,7 +110,7 @@ using clEnqueueCopyBufferToImageFunc = cl_int(CL_API_CALL *)(cl_command_queue, c
using clEnqueueCopyImageToBufferFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, const size_t *,
const size_t *, size_t, cl_uint, const cl_event *,
cl_event *);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *,
@ -118,7 +118,7 @@ using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_fo
using clEnqueueFillImageFunc = cl_int (*)(cl_command_queue, cl_mem, const void *, const size_t *, const size_t *,
cl_uint, const cl_event *, cl_event *);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
using clCreateProgramWithILFunc = cl_program (*)(cl_context, const void *, size_t, cl_int *);
using clSVMAllocFunc = void *(*)(cl_context, cl_mem_flags, size_t size, cl_uint);
using clSVMFreeFunc = void (*)(cl_context, void *);
@ -185,13 +185,13 @@ CL_DECLARE_FUNC_PTR(clGetEventProfilingInfo);
CL_DECLARE_FUNC_PTR(clGetImageInfo);
CL_DECLARE_FUNC_PTR(clEnqueueCopyBufferToImage);
CL_DECLARE_FUNC_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
CL_DECLARE_FUNC_PTR(clRetainDevice);
CL_DECLARE_FUNC_PTR(clReleaseDevice);
CL_DECLARE_FUNC_PTR(clCreateImage);
CL_DECLARE_FUNC_PTR(clEnqueueFillImage);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
CL_DECLARE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
CL_DECLARE_FUNC_PTR(clCreateCommandQueueWithProperties);
CL_DECLARE_FUNC_PTR(clGetExtensionFunctionAddress);

Loading…
Cancel
Save