isolate FP16 compilation

pull/11182/head
zengxianglong 4 years ago
parent 6899c46ffd
commit 51b2997ca6

@ -551,7 +551,7 @@ build_lite()
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
-DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \
-DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="off" \
-DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="on" \
-DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-DSUPPORT_GPU=${LITE_ENABLE_GPU} -DSUPPORT_NPU=${LITE_ENABLE_NPU} -DENABLE_V0=on \
-DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \

@ -23,4 +23,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES})
add_library(nnacl_fp16_mid OBJECT ${FP16_FILES})
if (ENABLE_FP16)
add_library(nnacl_fp16_mid OBJECT ${FP16_FILES})
endif ()

@ -128,8 +128,9 @@ endif ()
if (PLATFORM_ARM64)
target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid)
target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid)
if (ENABLE_FP16)
target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid)
target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid)
endif ()
endif ()

@ -9,16 +9,18 @@ file(GLOB KERNEL_SRC
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
if (SUPPORT_TRAIN)
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
endif()
add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
add_dependencies(cpu_kernel_mid fbs_src)
if (PLATFORM_ARM64)
if (ENABLE_FP16)
file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
endif ()
file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
endif ()

@ -474,9 +474,14 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
#endif
}
if (type == kernel::kCpuFP16SubGraph) {
#ifdef ENABLE_FP16
auto sub_kernel = new (std::nothrow)
kernel::CpuFp16SubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
return sub_kernel;
#else
MS_LOG(ERROR) << "FP16 subgraph is not supported!";
return nullptr;
#endif
}
if (type == kernel::kCpuFP32SubGraph) {
auto sub_kernel = new (std::nothrow)

@ -16,8 +16,7 @@
#include "src/sub_graph_kernel.h"
#include "src/tensor.h"
#ifdef ENABLE_ARM64
#include "src/common/utils.h"
#if defined(ENABLE_ARM64) && defined(ENABLE_FP16)
#include "src/runtime/kernel/arm/fp16/fp16_op_handler.h"
#endif
@ -175,6 +174,7 @@ int CpuSubGraph::Prepare() {
return RET_OK;
}
#ifdef ENABLE_FP16
void CpuFp16SubGraph::FreeOriginInputData() {
for (auto *data_store : this->origin_input_data_) {
if (data_store == nullptr) {
@ -300,4 +300,5 @@ int CpuFp16SubGraph::PostProcess() {
return RET_OK;
#endif
}
#endif
} // namespace mindspore::kernel

@ -157,6 +157,7 @@ class CpuFp32SubGraph : public CpuSubGraph {
int PostProcess() override { return CpuSubGraph::PostProcess(); }
};
#ifdef ENABLE_FP16
class CpuFp16SubGraph : public CpuSubGraph {
public:
CpuFp16SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
@ -182,5 +183,6 @@ class CpuFp16SubGraph : public CpuSubGraph {
private:
std::vector<DataStore *> origin_input_data_{};
};
#endif
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H

@ -288,15 +288,19 @@ if (SUPPORT_GPU)
endif()
if (ENABLE_FP16)
file(GLOB_RECURSE TEST_CASE_KERNEL_FP16_SRC
${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/*.cc
)
set(TEST_SRC
${TEST_SRC}
${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc)
${TEST_CASE_KERNEL_FP16_SRC}
)
endif ()
add_executable(lite-test ${TEST_SRC})
add_dependencies(lite-test fbs_src)
target_link_libraries(lite-test dl mindspore::gtest)
if (PLATFORM_ARM64)
if (PLATFORM_ARM64 AND ENABLE_FP16)
target_link_libraries(lite-test nnacl_fp16_mid nnacl_optimize_mid)
endif()

@ -20,6 +20,9 @@
#include "common/common_test.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/nnacl/pack.h"
#ifdef ENABLE_FP16
#include "mindspore/lite/nnacl/fp16/pack_fp16.h"
#endif
namespace mindspore {
class TestPack : public mindspore::CommonTest {

@ -71,7 +71,7 @@ void TestReduceFp16::Prepare(const std::vector<int> &input_shape, const std::vec
desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16, schema::PrimitiveType_Reduce};
ctx_ = lite::InnerContext();
ctx_.thread_num_ = thread_num;
ASSERT_EQ(lite::RET_OK, context->Init());
ASSERT_EQ(lite::RET_OK, ctx_.Init());
creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator_, nullptr);
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(&param_), &ctx_, desc, nullptr);

Loading…
Cancel
Save