!10435 ci add npu

From: @yeyunpeng2020 Reviewed-by: @zhanghaibo5,@HilbertDavid Signed-off-by: @zhanghaibo5
4 years ago · 990d06f302
parent 6414f4b961 0149d56f4c
commit 990d06f302
6 changed files with 30 additions and 7 deletions
--- a/mindspore/lite/include/context.h
+++ b/mindspore/lite/include/context.h
@ -34,7 +34,7 @@ typedef enum {
 typedef enum {
  DT_CPU, /**< CPU device type */
  DT_GPU, /**< GPU device type */
-  DT_NPU  /**< NPU device type, not supported yet */
+  DT_NPU  /**< NPU device type */
 } DeviceType;

 /// \brief CpuDeviceInfo defined for CPU's configuration information.
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@ -88,10 +88,11 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<

    if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) {
      // Change data&tensor shape nc->nh
-      PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(),
-                         out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel());
-      out_tensors[i]->set_shape({out_tensors[i]->shape()[0], out_tensors[i]->shape()[2], out_tensors[i]->shape()[3],
-                                 out_tensors[i]->shape()[1]});
+      PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data,
+                         npu_output_tensors_[i]->GetTensorDimension().GetNumber(),
+                         npu_output_tensors_[i]->GetTensorDimension().GetWidth() *
+                           npu_output_tensors_[i]->GetTensorDimension().GetHeight(),
+                         npu_output_tensors_[i]->GetTensorDimension().GetChannel());
    } else {
      memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
      out_tensors[i]->ResetRefCount();
--- a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc
@ -47,7 +47,7 @@ bool NPUManager::CheckEMUIVersion() {
  int pos = emui_str.find('_');
  if (pos != std::string::npos) {
    auto version = emui_str.substr(pos + 1);
-    int ret = CompareVersion(version, "11.0.0");
+    int ret = CompareVersion(version, "10.0.0");
    if (ret < 0) {
      return false;
    }
@ -138,6 +138,7 @@ int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::st
  index_++;
  return RET_OK;
 }
+
 std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() {
  auto client = std::make_shared<hiai::AiModelMngerClient>();
  if (client == nullptr) {
@ -151,6 +152,7 @@ std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient()
  }
  return client;
 }
+
 int NPUManager::LoadOMModel() {
  std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc;
  std::shared_ptr<hiai::AiModelMngerClient> client = nullptr;
--- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
+++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
@ -78,7 +78,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
 }

 int SubGraphNpuKernel::Run() {
-  return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_kernels_, nodes_);
+  return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_nodes_, nodes_);
 }

 int SubGraphNpuKernel::BuildNPUInputOp() {
--- a/mindspore/lite/test/models_npu.cfg
+++ b/mindspore/lite/test/models_npu.cfg
@ -0,0 +1,3 @@
+mobilenet_v1_1.0_224.tflite 3
+squeezenet.tflite 3
+inception_v3.tflite 3
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@ -1314,6 +1314,22 @@ function Run_arm64() {
        fi
    done < ${models_mindspore_weightquant_config}

+    # Run npu converted models:
+    while read line; do
+        model_name=`echo ${mindspore_line_info}|awk -F ' ' '{print $1}'`
+        accuracy_limit=`echo ${mindspore_line_info}|awk -F ' ' '{print $2}'`
+        echo "mindspore run npu: ${model_name}, accuracy limit:${accuracy_limit}" >> "${run_arm64_log_file}"
+        echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=NPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold='${accuracy_limit} >> "${run_arm64_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=NPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt
+        adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
+        if [ $? = 0 ]; then
+            run_result='arm64_npu: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
+        else
+            run_result='arm64_npu: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
+        fi
+    done < ${models_npu_config}
+
    # Run converted models which has several inputs or does not need to be cared about the accuracy:
    while read line; do
        model_name=${line%%;*}
@ -1492,6 +1508,7 @@ models_gpu_fp16_config=${basepath}/models_gpu_fp16.cfg
 models_gpu_weightquant_config=${basepath}/models_gpu_weightquant.cfg
 models_mindspore_weightquant_config=${basepath}/models_mindspore_weightquant.cfg
 models_arm32_config=${basepath}/models_arm32.cfg
+models_npu_config=${basepath}/models_npu.cfg
 models_compatibility_config=${basepath}/models_compatibility.cfg
 models_only_for_process_config=${basepath}/models_with_several_inputs_or_without_outputs.cfg