isolate FP16 compilation

4 years ago · 51b2997ca6
parent 6899c46ffd
commit 51b2997ca6
10 changed files with 36 additions and 16 deletions
--- a/build.sh
+++ b/build.sh
@ -551,7 +551,7 @@ build_lite()
        cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19"      \
              -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang"  \
              -DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN}                     \
-              -DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="off"      \
+              -DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="on"      \
              -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
              -DSUPPORT_GPU=${LITE_ENABLE_GPU} -DSUPPORT_NPU=${LITE_ENABLE_NPU} -DENABLE_V0=on \
              -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
--- a/mindspore/lite/nnacl/optimize/CMakeLists.txt
+++ b/mindspore/lite/nnacl/optimize/CMakeLists.txt
@ -23,4 +23,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")

 add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES})

-add_library(nnacl_fp16_mid OBJECT ${FP16_FILES})
+if (ENABLE_FP16)
+    add_library(nnacl_fp16_mid OBJECT ${FP16_FILES})
+endif ()
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -128,8 +128,9 @@ endif ()
 if (PLATFORM_ARM64)
    target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid)
    target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid)
-
+    if (ENABLE_FP16)
        target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid)
        target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid)
+    endif ()
 endif ()

--- a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
@ -9,16 +9,18 @@ file(GLOB KERNEL_SRC
 list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)

 if (SUPPORT_TRAIN)
-file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
-set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
+    file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
+    set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
 endif()

 add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
 add_dependencies(cpu_kernel_mid fbs_src)

 if (PLATFORM_ARM64)
+    if (ENABLE_FP16)
        file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
        add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
+    endif ()
    file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
    add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
 endif ()
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@ -474,9 +474,14 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
 #endif
  }
  if (type == kernel::kCpuFP16SubGraph) {
+#ifdef ENABLE_FP16
    auto sub_kernel = new (std::nothrow)
      kernel::CpuFp16SubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
    return sub_kernel;
+#else
+    MS_LOG(ERROR) << "FP16 subgraph is not supported!";
+    return nullptr;
+#endif
  }
  if (type == kernel::kCpuFP32SubGraph) {
    auto sub_kernel = new (std::nothrow)
--- a/mindspore/lite/src/sub_graph_kernel.cc
+++ b/mindspore/lite/src/sub_graph_kernel.cc
@ -16,8 +16,7 @@

 #include "src/sub_graph_kernel.h"
 #include "src/tensor.h"
-#ifdef ENABLE_ARM64
-#include "src/common/utils.h"
+#if defined(ENABLE_ARM64) && defined(ENABLE_FP16)
 #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h"
 #endif

@ -175,6 +174,7 @@ int CpuSubGraph::Prepare() {
  return RET_OK;
 }

+#ifdef ENABLE_FP16
 void CpuFp16SubGraph::FreeOriginInputData() {
  for (auto *data_store : this->origin_input_data_) {
    if (data_store == nullptr) {
@ -300,4 +300,5 @@ int CpuFp16SubGraph::PostProcess() {
  return RET_OK;
 #endif
 }
+#endif
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/sub_graph_kernel.h
+++ b/mindspore/lite/src/sub_graph_kernel.h
@ -157,6 +157,7 @@ class CpuFp32SubGraph : public CpuSubGraph {
  int PostProcess() override { return CpuSubGraph::PostProcess(); }
 };

+#ifdef ENABLE_FP16
 class CpuFp16SubGraph : public CpuSubGraph {
 public:
  CpuFp16SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
@ -182,5 +183,6 @@ class CpuFp16SubGraph : public CpuSubGraph {
 private:
  std::vector<DataStore *> origin_input_data_{};
 };
+#endif
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_H
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@ -288,15 +288,19 @@ if (SUPPORT_GPU)
 endif()

 if (ENABLE_FP16)
+    file(GLOB_RECURSE TEST_CASE_KERNEL_FP16_SRC
+            ${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/*.cc
+            )
    set(TEST_SRC
            ${TEST_SRC}
-            ${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc)
+            ${TEST_CASE_KERNEL_FP16_SRC}
+            )
 endif ()

 add_executable(lite-test ${TEST_SRC})
 add_dependencies(lite-test fbs_src)
 target_link_libraries(lite-test dl mindspore::gtest)
-if (PLATFORM_ARM64)
+if (PLATFORM_ARM64 AND ENABLE_FP16)
    target_link_libraries(lite-test nnacl_fp16_mid nnacl_optimize_mid)
 endif()

--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/common/pack_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/common/pack_tests.cc
@ -20,6 +20,9 @@
 #include "common/common_test.h"
 #include "mindspore/lite/src/common/file_utils.h"
 #include "mindspore/lite/nnacl/pack.h"
+#ifdef ENABLE_FP16
+#include "mindspore/lite/nnacl/fp16/pack_fp16.h"
+#endif

 namespace mindspore {
 class TestPack : public mindspore::CommonTest {
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/reduce_fp16_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp16/reduce_fp16_tests.cc
@ -71,7 +71,7 @@ void TestReduceFp16::Prepare(const std::vector<int> &input_shape, const std::vec
  desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16, schema::PrimitiveType_Reduce};
  ctx_ = lite::InnerContext();
  ctx_.thread_num_ = thread_num;
-  ASSERT_EQ(lite::RET_OK, context->Init());
+  ASSERT_EQ(lite::RET_OK, ctx_.Init());
  creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
  ASSERT_NE(creator_, nullptr);
  kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);