diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index ecdd2b1668..0a06c0d4a3 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -136,6 +136,8 @@ if(PLATFORM_ARM64) COMPONENT ${RUNTIME_COMPONENT_NAME}) install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend* ops*" EXCLUDE) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME} + COMPONENT ${CODEGEN_COMPONENT_NAME}) if(ENABLE_TOOLS) install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) endif() @@ -157,6 +159,8 @@ elseif(PLATFORM_ARM32) COMPONENT ${RUNTIME_COMPONENT_NAME}) install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend*" EXCLUDE) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME} + COMPONENT ${CODEGEN_COMPONENT_NAME}) if(ENABLE_TOOLS) install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) endif() @@ -231,6 +235,8 @@ else() install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${CONVERTER_PKG_NAME}/third_party/glog/lib RENAME libglog.so.0 COMPONENT ${CONVERTER_COMPONENT_NAME}) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME} + COMPONENT ${CODEGEN_COMPONENT_NAME}) install(TARGETS codegen RUNTIME DESTINATION ${CODEGEN_PKG_NAME}/ COMPONENT ${CODEGEN_COMPONENT_NAME}) endif() @@ -249,7 +255,7 @@ else() endif() set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) if(PLATFORM_ARM64 OR PLATFORM_ARM32) - set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME}) + set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME}) else() set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CONVERTER_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME}) endif() diff --git a/mindspore/core/utils/log_adapter.h b/mindspore/core/utils/log_adapter.h index 63329fda0f..a70764073e 100644 --- a/mindspore/core/utils/log_adapter.h +++ b/mindspore/core/utils/log_adapter.h @@ -34,7 +34,7 @@ #define LOG_HDR_FILE_REL_PATH "mindspore/core/utils/log_adapter.h" // Get start index of file relative path in __FILE__ -static constexpr int GetRelPathPos() noexcept { +static constexpr size_t GetRelPathPos() noexcept { return sizeof(__FILE__) > sizeof(LOG_HDR_FILE_REL_PATH) ? sizeof(__FILE__) - sizeof(LOG_HDR_FILE_REL_PATH) : 0; } diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 65128f7fd3..8180cdd2f3 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -89,8 +89,10 @@ if(SUPPORT_TRAIN) else() if(PLATFORM_ARM64) set(RUNTIME_COMPONENT_NAME inference-android-aarch64) + set(CODEGEN_COMPONENT_NAME codegen-android-aarch64) elseif(PLATFORM_ARM32) set(RUNTIME_COMPONENT_NAME inference-android-aarch32) + set(CODEGEN_COMPONENT_NAME codegen-android-aarch32) elseif(WIN32) if("${X86_64_SIMD}" STREQUAL "off") set(RUNTIME_COMPONENT_NAME inference-win-x64) @@ -218,7 +220,6 @@ if(ENABLE_CONVERTER) include(${TOP_DIR}/cmake/external_libs/eigen.cmake) include(${TOP_DIR}/cmake/external_libs/protobuf.cmake) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder) endif() if(ENABLE_MINDRT) @@ -272,6 +273,7 @@ endif() add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder) if(ENABLE_TOOLS) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) if(SUPPORT_TRAIN) diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake index e72274f2a5..70d2818dee 100644 --- a/mindspore/lite/micro/cmake/file_list.cmake +++ b/mindspore/lite/micro/cmake/file_list.cmake @@ -301,6 +301,30 @@ set(LITE_KERNEL_SRC ${LITE_DIR}/nnacl/infer/splice_infer.c ) +#### sse +if("${X86_64_SIMD}" STREQUAL "sse") + set(SSE_SRC + ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c + ${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c + ${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c + ) + set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C) +endif() + +#### avx +if("${X86_64_SIMD}" STREQUAL "avx") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2") + set(AVX_SRC + ${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c + ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c + ${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c + ${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c + ${LITE_DIR}/nnacl/assembly/avx/MatmulAvx.S + ) + set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C) +endif() + list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC} - ${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE}) + ${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE} ${SSE_SRC} ${AVX_SRC}) diff --git a/mindspore/lite/micro/coder/CMakeLists.txt b/mindspore/lite/micro/coder/CMakeLists.txt index 97a05fe2e1..70ef0b23b6 100644 --- a/mindspore/lite/micro/coder/CMakeLists.txt +++ b/mindspore/lite/micro/coder/CMakeLists.txt @@ -25,10 +25,12 @@ include(${MICRO_DIR}/cmake/file_list.cmake) include(${MICRO_DIR}/cmake/package_wrapper.cmake) add_subdirectory(operator_library) -add_executable(codegen main.cc ${FILE_SET}) -add_dependencies(codegen fbs_src) -add_dependencies(codegen fbs_inner_src) -target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog) -if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release") - add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH}) +if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) + add_executable(codegen main.cc ${FILE_SET}) + add_dependencies(codegen fbs_src) + add_dependencies(codegen fbs_inner_src) + target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog) + if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release") + add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH}) + endif() endif() diff --git a/mindspore/lite/micro/coder/allocator/allocator.h b/mindspore/lite/micro/coder/allocator/allocator.h index 660d48e121..0ca0c859c6 100644 --- a/mindspore/lite/micro/coder/allocator/allocator.h +++ b/mindspore/lite/micro/coder/allocator/allocator.h @@ -92,19 +92,17 @@ class MemoryAllocator { * including tensor, workspace */ template - std::string GetRuntimeAddr(T t, bool is_const = false) { + std::string GetRuntimeAddr(T t, bool immutable = false) { if (!t) { return ""; } - std::string type_info = is_const ? "const " : ""; std::string type_name; if (std::type_index(typeid(T)) == std::type_index(typeid(Tensor *))) { type_name = GetTensorDataType(reinterpret_cast(t)->data_type()) + "*"; } else { type_name = GetVariableTypeName(); } - type_info = wrap(type_info + type_name); - + std::string type_info = wrap(type_name); void *variable = reinterpret_cast(t); auto item = inputs_addr_.find(variable); if (item != inputs_addr_.end()) { @@ -133,6 +131,9 @@ class MemoryAllocator { [&variable](const std::pair &a) { return variable == a.first; }); if (iter != origin_weights_addr_.end()) { saved_weights_addr_.insert(std::make_pair(iter->second, reinterpret_cast(variable))); + if (immutable) { + malloc_weights_addr_.insert({reinterpret_cast(variable), iter->second}); + } return iter->second; } MS_LOG(ERROR) << "uninitialized memory"; diff --git a/mindspore/lite/micro/coder/generator/component/benchmark_component.cc b/mindspore/lite/micro/coder/generator/component/benchmark_component.cc index 1b24efc4ed..3c6e482862 100644 --- a/mindspore/lite/micro/coder/generator/component/benchmark_component.cc +++ b/mindspore/lite/micro/coder/generator/component/benchmark_component.cc @@ -134,7 +134,7 @@ void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name) << " uint64_t timeAvg = 0;\n" << " int loop_count = atoi(argv[3]);\n" << " printf(\"======Inference Start======\\n\");\n" - << " printf(\"cycles: %d\", loop_count);\n" + << " printf(\"cycles: %d\\n\", loop_count);\n" << " for (int i = 0; i < loop_count; i++) {\n" << " uint64_t runBegin = GetTimeUs();\n" << " " << module_name << "_Inference();\n" diff --git a/mindspore/lite/micro/coder/generator/component/cmake_component.cc b/mindspore/lite/micro/coder/generator/component/cmake_component.cc index 9096f3872e..a115fe425b 100644 --- a/mindspore/lite/micro/coder/generator/component/cmake_component.cc +++ b/mindspore/lite/micro/coder/generator/component/cmake_component.cc @@ -48,7 +48,7 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con } ofs << "file(GLOB NET_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.c)\n" - << "add_library(${PROJ_NAME} STATIC ${NET_SRC})\n"; + << "add_library(net STATIC ${NET_SRC})\n"; } } // namespace mindspore::lite::micro diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.h b/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.h index 3e0166c6f1..d9a07fffdc 100644 --- a/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.h +++ b/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.h @@ -19,9 +19,8 @@ const char *bench_cmake_lists_txt = "cmake_minimum_required(VERSION 3.14)\n" - "project(${PROJ_NAME})\n" + "project(benchmark)\n" "\n" - "message(\"project name: ${PROJ_NAME}\")\n" "message(\"project name: ${MODEL_LIB_PATH}\")\n" "message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n" "\n" @@ -54,14 +53,13 @@ const char *bench_cmake_lists_txt = "endif ()\n" "link_directories(${MODEL_LIB_PATH})\n" "include(benchmark.cmake)\n" - "add_executable(${PROJ_NAME}_bench ${SRC_FILES})\n" - "target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm -pthread)\n"; + "add_executable(benchmark ${SRC_FILES})\n" + "target_link_libraries(benchmark ${MODEL_LIB_NAME} -lm -pthread)\n"; const char *src_cmake_lists_txt = "cmake_minimum_required(VERSION 3.14)\n" - "project(${PROJ_NAME})\n" + "project(net)\n" "\n" - "message(\"project name: ${PROJ_NAME}\")\n" "message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n" "message(\"operator lib path: ${OP_LIB}\")\n" "message(\"operator header path: ${OP_HEADER_PATH}\")\n" @@ -83,10 +81,11 @@ const char *src_cmake_lists_txt = "else()\n" " set(CMAKE_C_FLAGS \"-fPIC -fPIE -O3 -Werror -fstack-protector-strong -fomit-frame-pointer ${CMAKE_C_FLAGS}\")\n" " set(CMAKE_C_FLAGS_Release \"${CMAKE_C_FLAGS_Release} -O3 -ffunction-sections -Werror -fdata-sections\")\n" + " string(REPLACE \"-g\" \"\" CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\")\n" "endif()\n" "\n" "function(create_library)\n" - " add_custom_command(TARGET ${PROJ_NAME}\n" + " add_custom_command(TARGET net\n" " POST_BUILD\n" " COMMAND rm -rf tmp\n" " COMMAND mkdir tmp\n" @@ -97,9 +96,9 @@ const char *src_cmake_lists_txt = " COMMENT \"unzip raw static library ${library_name}\"\n" " )\n" " foreach (object_file ${OP_SRC})\n" - " add_custom_command(TARGET ${PROJ_NAME} POST_BUILD COMMAND mv ./tmp/${object_file} .)\n" + " add_custom_command(TARGET net POST_BUILD COMMAND mv ./tmp/${object_file} .)\n" " endforeach ()\n" - " add_custom_command(TARGET ${PROJ_NAME}\n" + " add_custom_command(TARGET net\n" " POST_BUILD\n" " COMMAND ar cr ${library_name} *.o\n" " COMMAND ranlib ${library_name}\n" @@ -109,7 +108,7 @@ const char *src_cmake_lists_txt = " COMMENT \"generate specified static library ${library_name}\"\n" " )\n" "endfunction(create_library)\n" - "string(CONCAT library_name \"lib\" ${PROJ_NAME} \".a\")\n" + "string(CONCAT library_name \"lib\" net \".a\")\n" "create_library()\n"; #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ diff --git a/mindspore/lite/micro/coder/generator/component/parallel_component.cc b/mindspore/lite/micro/coder/generator/component/parallel_component.cc index a4083498ab..f01322f61a 100644 --- a/mindspore/lite/micro/coder/generator/component/parallel_component.cc +++ b/mindspore/lite/micro/coder/generator/component/parallel_component.cc @@ -36,7 +36,7 @@ void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name) { " MICRO_ERROR(\"set global thread pool failed\");\n" " return RET_ERROR;\n" " }\n" - " MICRO_INFO(\"config: ThreadNum: %d, BindMode: %d\", thread_num, bind_mode);\n"; + " printf(\"config: ThreadNum: %d, BindMode: %d\\n\", thread_num, bind_mode);\n"; } void CodeDestroyThreadPool(std::ofstream &ofs) { ofs << " DestroyThreadPool(thread_pool);\n"; } diff --git a/mindspore/lite/micro/coder/generator/component/weight_component.cc b/mindspore/lite/micro/coder/generator/component/weight_component.cc index 28d2929c5b..3a8406a93c 100644 --- a/mindspore/lite/micro/coder/generator/component/weight_component.cc +++ b/mindspore/lite/micro/coder/generator/component/weight_component.cc @@ -17,9 +17,9 @@ #include "coder/generator/component/weight_component.h" #include #include -#include #include "coder/generator/component/const_blocks/license.h" #include "coder/utils/coder_utils.h" +#include "coder/opcoders/parallel.h" namespace mindspore::lite::micro { void CodeWeightFileHeader(std::ofstream &ofs, const std::unique_ptr &ctx) { @@ -89,7 +89,7 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons << " if (weight_buffer == NULL) {\n" << " return RET_ERROR;\n" << " }\n"; - + ofs << " int " << gThreadNum << " = 1;\n\n"; ofs << " struct ModelParameter {\n" << " void *addr;\n" << " size_t size;\n" diff --git a/mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.cc b/mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.cc index cd0684d2d9..c6646d4b64 100644 --- a/mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.cc @@ -82,9 +82,9 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() { MS_CHECK_PTR(params_->decoded_boxes_); params_->nms_candidate_ = allocator_->Malloc(kNumberTypeUInt8, num_boxes_ * sizeof(uint8_t), kWorkspace); MS_CHECK_PTR(params_->nms_candidate_); - params_->selected_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace); + params_->selected_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace); MS_CHECK_PTR(params_->selected_); - params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace); + params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace); MS_CHECK_PTR(params_->single_class_indexes_); if (params_->use_regular_nms_) { @@ -92,13 +92,13 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() { allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace); MS_CHECK_PTR(params_->scores_); params_->indexes_ = - allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); + allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); MS_CHECK_PTR(params_->indexes_); params_->all_class_scores_ = allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace); MS_CHECK_PTR(params_->all_class_scores_); params_->all_class_indexes_ = - allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); + allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); MS_CHECK_PTR(params_->all_class_indexes_); } else { params_->scores_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * sizeof(float), kWorkspace); diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc index 5ff71219e6..5914d31e4a 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc @@ -36,7 +36,7 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) { return RET_ERROR; } size_t data_size = input_tensor_->ElementsNum(); - std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex)); + std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true); Collect(ctx, {"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h", "nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"}, diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc index 0c75fff1cb..6f37ec18fe 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc @@ -183,13 +183,15 @@ int Conv2DINT8Coder::Resize() { int Conv2DINT8Coder::DoCode(CoderContext *const context) { std::vector asm_files; if (target_ == kARM32A) { - asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon32.S"}; + asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S"}; } else if (target_ == kARM64) { - asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon64.S"}; + asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S", "MatmulDpInt8.S"}; } - Collect(context, {"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h"}, + Collect(context, + {"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h", + "wrapper/base/common_wrapper.h", "wrapper/base/optimize_handler_wrapper.h"}, {"common_func.c", "pack_int8.c", "conv_int8.c", "winograd_transform.c", "matmul_int8.c", "fixed_point.c", - "convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "thread_pool.c"}, + "convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "common_wrapper.c", "optimize_handler_wrapper.c"}, asm_files); // call the op function nnacl::NNaclInt8Serializer code; @@ -202,7 +204,6 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) { code.CodeBaseStruct("ConvolutionInt8Args", kRunArgs, input_tensor_, packed_input_, matmul_packed_input_, packed_weight_, bias_data_, output_tensor_, filter_zp_ptr_, input_sum_, "(ConvParameter *)&conv_param", matmul_func_, support_optimize_); - code.CodeFunction("CheckSupportOptimize", kRunArgsAddr); if (support_parallel_) { code.CodeFunction(kParallelLaunch, gThreadPool, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum); } else { diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.h index 6f3b0e233d..c34ccf670b 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.h +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.h @@ -44,10 +44,8 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder { } private: - int InitWeightBias(CoderContext *ctx); - void CheckSupportOptimize(); - + int InitWeightBias(CoderContext *ctx); int InitTmpBuffer(CoderContext *ctx); int Resize(); @@ -70,7 +68,7 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder { int32_t *input_sum_{nullptr}; int8_t *matmul_packed_input_{nullptr}; - std::string matmul_func_; + std::string matmul_func_{"NULL"}; std::function pack_weight_init_{nullptr}; }; diff --git a/mindspore/lite/micro/coder/opcoders/serializers/serializer.h b/mindspore/lite/micro/coder/opcoders/serializers/serializer.h index 6a5401c1d2..9938a82387 100644 --- a/mindspore/lite/micro/coder/opcoders/serializers/serializer.h +++ b/mindspore/lite/micro/coder/opcoders/serializers/serializer.h @@ -168,9 +168,13 @@ class Serializer { * "int pointer_gen[4] = {1 ,3, 2, 42};\n * const Foo foo_gen = {{1, 2, 3}, pointer_gen, 4};\n" */ - template + template void CodeBaseStruct(const std::string &type, const std::string &name, PARAMETERS... parameters) { - code << "const " << type << " " << name << " = {"; + if constexpr (immutable) { + code << "const " << type << " " << name << " = {"; + } else { + code << type << " " << name << " = {"; + } GenCode(parameters...); code << "};\n"; } diff --git a/mindspore/lite/micro/coder/operator_library/CMakeLists.txt b/mindspore/lite/micro/coder/operator_library/CMakeLists.txt index c86d5f7fd3..fe7cb4b7a2 100644 --- a/mindspore/lite/micro/coder/operator_library/CMakeLists.txt +++ b/mindspore/lite/micro/coder/operator_library/CMakeLists.txt @@ -22,7 +22,6 @@ endif() set(MICRO_CMAKE_PATH ${MICRO_DIR}/cmake) set(OPERATOR_LIBRARY_PATH ${CMAKE_BINARY_DIR}/operator_library) set(HEADER_PATH "${OPERATOR_LIBRARY_PATH}/include") -set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86") message("===========>start to pack operators' head file") file(REMOVE_RECURSE ${OPERATOR_LIBRARY_PATH}) @@ -36,14 +35,31 @@ file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/assembly) file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16) file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16_grad) file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp32_grad) -file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/intrinsics) file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/optimize) +if(PLATFORM_ARM64) + set(MICRO_BUILD_ARM64 ON) +endif() +if(PLATFORM_ARM32) + set(MICRO_BUILD_ARM32A ON) +endif() + include(${MICRO_CMAKE_PATH}/package_android.cmake) include(${MICRO_CMAKE_PATH}/package_nnacl.cmake) include(${MICRO_CMAKE_PATH}/package_cmsis.cmake) include(${MICRO_CMAKE_PATH}/package_wrapper.cmake) +list(APPEND OP_FILES ${NNACL_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC}) + +if(PLATFORM_ARM64) + set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm64") +elseif(PLATFORM_ARM32) + set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm32a") +else() + set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86") + list(APPEND OP_FILES ${CMSIS_OPS}) +endif() + # generate static library -add_library(ops STATIC ${NNACL_OPS} ${CMSIS_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC}) +add_library(ops STATIC ${OP_FILES}) install(TARGETS ops ARCHIVE DESTINATION ${LIB_PATH}) diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/base/common_wrapper.c b/mindspore/lite/micro/coder/operator_library/wrapper/base/common_wrapper.c new file mode 100644 index 0000000000..014a80d2ca --- /dev/null +++ b/mindspore/lite/micro/coder/operator_library/wrapper/base/common_wrapper.c @@ -0,0 +1,36 @@ +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "wrapper/base/common_wrapper.h" +#ifdef __ANDROID__ +#include +#include +#endif + +bool GetSupportOptFlag() { + bool status = false; +#ifdef ENABLE_ARM64 + int hwcap_type = 16; + // getHwCap + uint32_t hwcap = getauxval(hwcap_type); + if (hwcap & HWCAP_ASIMDDP) { + status = true; + } else { + status = false; + } +#endif + return status; +} diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/base/common_wrapper.h b/mindspore/lite/micro/coder/operator_library/wrapper/base/common_wrapper.h new file mode 100644 index 0000000000..a68fdf8240 --- /dev/null +++ b/mindspore/lite/micro/coder/operator_library/wrapper/base/common_wrapper.h @@ -0,0 +1,24 @@ +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_ +#define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_ + +#include "nnacl/op_base.h" + +bool GetSupportOptFlag(); + +#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_ diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/base/optimize_handler_wrapper.c b/mindspore/lite/micro/coder/operator_library/wrapper/base/optimize_handler_wrapper.c new file mode 100644 index 0000000000..bee2c6e35e --- /dev/null +++ b/mindspore/lite/micro/coder/operator_library/wrapper/base/optimize_handler_wrapper.c @@ -0,0 +1,49 @@ +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "wrapper/base/optimize_handler_wrapper.h" + +extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, + const int *input_sum, const int *bias); +extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, + const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, + int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, + size_t peroc); +extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4, + const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier, + int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp); + +#ifdef ENABLE_ARM64 +void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, + const int *input_sum, const int *bias) { + return MatMulOptR4Int8Neon64(a, b, dst, row4, col4, deep16, input_sum, bias); +} + +void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, + size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, + int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel) { + return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi, + output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); +} +void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, + size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, + int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel, int32_t *filter_zp) { + return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, + right_shift, stride, per_channel, filter_zp); +} +#endif diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/base/optimize_handler_wrapper.h b/mindspore/lite/micro/coder/operator_library/wrapper/base/optimize_handler_wrapper.h new file mode 100644 index 0000000000..40e82acbab --- /dev/null +++ b/mindspore/lite/micro/coder/operator_library/wrapper/base/optimize_handler_wrapper.h @@ -0,0 +1,41 @@ +/* + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ +#define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ + +#include "nnacl/op_base.h" + +#ifdef ENABLE_ARM64 +void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias, + size_t ksize, size_t ic4, size_t output_channel, size_t offset, + const int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp, + int32_t *out_multiplier, int32_t *shift_before, int32_t *shift_after, + size_t asymmetric, size_t per_channel, size_t per_channel_offset); +void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, + const int *input_sum, const int *bias); + +void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, + size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, + int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel); +void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, + size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, + int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel, int32_t *filter_zp); +#endif + +#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/fp32/matmul_fp32_wrapper.c b/mindspore/lite/micro/coder/operator_library/wrapper/fp32/matmul_fp32_wrapper.c index db505679c0..50dbab12dd 100644 --- a/mindspore/lite/micro/coder/operator_library/wrapper/fp32/matmul_fp32_wrapper.c +++ b/mindspore/lite/micro/coder/operator_library/wrapper/fp32/matmul_fp32_wrapper.c @@ -22,21 +22,12 @@ void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa } for (int i = 0; i < params_->batch; i++) { const float *src = src_ptr + i * params_->deep_ * params_->row_; -#ifdef ENABLE_ARM32 - float *dst = dst_ptr + i * params_->deep_ * params_->row_4_; - if (params_->a_transpose_) { - RowMajor2Row4Major(src, dst, params_->deep_, params_->row_); - } else { - RowMajor2Col4Major(src, dst, params_->row_, params_->deep_); - } -#else - float *dst = dst_ptr + i * params_->deep_ * params_->row_12_; + float *dst = dst_ptr + i * params_->deep_ * params_->row_align_; if (params_->a_transpose_) { RowMajor2Row12Major(src, dst, params_->deep_, params_->row_); } else { RowMajor2Col12Major(src, dst, params_->row_, params_->deep_); } -#endif } } @@ -55,11 +46,19 @@ void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa } for (int i = 0; i < params_->batch; i++) { const float *src = src_ptr + i * params_->deep_ * params_->col_; - float *dst = dst_ptr + i * params_->deep_ * params_->col_8_; + float *dst = dst_ptr + i * params_->deep_ * params_->col_align_; +#ifdef ENABLE_ARM32 + if (params_->b_transpose_) { + RowMajor2Col4Major(src, dst, params_->col_, params_->deep_); + } else { + RowMajor2Row4Major(src, dst, params_->deep_, params_->col_); + } +#else if (params_->b_transpose_) { RowMajor2Col8Major(src, dst, params_->col_, params_->deep_); } else { RowMajor2Row8Major(src, dst, params_->deep_, params_->col_); } +#endif } } diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.c b/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.c index 3f916829bf..48f5ccf778 100644 --- a/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.c +++ b/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.c @@ -16,24 +16,6 @@ #include "wrapper/int8/convolution_int8_wrapper.h" -void CheckSupportOptimize(const ConvolutionInt8Args *args) { - int tile_num = 8; -#ifdef ENABLE_ARM32 - tile_num = 4; - args->is_optimize_ = false; -#endif -#ifdef ENABLE_ARM64 - if (mindspore::lite::IsSupportSDot()) { - matmul_func_ = MatMulRInt8_optimize_handler; - args->is_optimize_ = true; - } else { - tile_num = 4; - args->is_optimize_ = false; - } -#endif - args->conv_param_->tile_num_ = tile_num; -} - int ConvolutionInt8Run(void *cdata, int task_id) { ConvolutionInt8Args *args = (ConvolutionInt8Args *)cdata; ConvInt8(args->input_data_, args->packed_input_, args->matmul_input_, args->packed_weight_, args->bias_data_, diff --git a/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.h b/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.h index ec19d41aa1..082ccf4156 100644 --- a/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.h +++ b/mindspore/lite/micro/coder/operator_library/wrapper/int8/convolution_int8_wrapper.h @@ -36,8 +36,6 @@ typedef struct { bool is_optimize_; } ConvolutionInt8Args; -void CheckSupportOptimize(const ConvolutionInt8Args *args); - int ConvolutionInt8Run(void *cdata, int task_id); #endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_WRAPPER_INT8_WRAPPER_H_