From e43005242c7e03622a9d025c3b20ad3ab4aeef7c Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Fri, 19 Mar 2021 09:30:14 +0800 Subject: [PATCH] concat output fix npu pass --- .../agent/npu/optimizer/npu_fusion_pass.cc | 17 +++++++++++++++++ .../npu/optimizer/npu_insert_transform_pass.cc | 7 ++++++- .../agent/npu/optimizer/npu_pass_utils.cc | 3 +++ .../agent/npu/optimizer/npu_transform_pass.cc | 4 ++-- mindspore/lite/test/models_npu.cfg | 1 + 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc index 42777037ad..175154b311 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc @@ -141,6 +141,23 @@ void UpdatePreTensors(kernel::LiteKernel *cur_kernel) { void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { auto tensor = cur_kernel->out_tensors()[0]; + + // in case: node->nh2nc->nc2nh(graph output) --->>> node->nc2nh, node out_tensor should be put to nnc2nh out tensors + auto out_kernels = cur_kernel->out_kernels(); + if (out_kernels.size() == 1 && out_kernels[0]->out_kernels().size() == 1 && + out_kernels[0]->out_kernels()[0]->out_kernels().empty() && + out_kernels[0]->out_kernels()[0]->type_str() == "Transpose") { + auto nc_tensor = out_kernels[0]->out_tensors()[0]; // nh2nc's out tensor + cur_kernel->set_out_tensors({nc_tensor}); + auto post_post_kernel = out_kernels[0]->out_kernels()[0]; + // nc2nh kernel set in_tensor out_tensor + auto post_post_k_in_tensors = post_post_kernel->in_tensors(); + post_post_k_in_tensors[0] = nc_tensor; + post_post_kernel->set_in_tensors(post_post_k_in_tensors); + post_post_kernel->set_out_tensors({tensor}); + return; + } + tensor->set_format(schema::Format_NCHW); auto nhwc_shape = tensor->shape(); tensor->set_shape({nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}); diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc index ea86fd5981..9ca39be2c4 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" +#include #include #include #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" @@ -51,7 +52,8 @@ int GetInsertState(kernel::LiteKernel *kernel) { // current kernel is target kernel // use out kernels to count how many out lines from current kernel - size_t in_out_tensor_num = kernel->in_tensors().size() + kernel->out_kernels().size(); + size_t in_out_tensor_num = + kernel->in_tensors().size() + std::max(kernel->out_kernels().size(), static_cast(1)); size_t transpose_input_num = 0; size_t transpose_output_num = 0; bool need_pre_insert = false; @@ -65,6 +67,9 @@ int GetInsertState(kernel::LiteKernel *kernel) { need_pre_insert = true; } } + if (kernel->out_kernels().empty()) { + need_post_insert = true; + } for (const auto out_kernel : kernel->out_kernels()) { if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) { transpose_output_num++; diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc index 7941f5103f..802d693b77 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc @@ -106,6 +106,9 @@ void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, break; } } + if (out_kernels.empty()) { + out_kernels.push_back(trans_kernel); + } pre_kernel->set_out_kernels(out_kernels); } diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc index 3e7e907e42..870df98b9d 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc @@ -156,8 +156,8 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vectorin_tensors(), post_trans_kernel->out_tensors()); diff --git a/mindspore/lite/test/models_npu.cfg b/mindspore/lite/test/models_npu.cfg index 62e2383566..def581e3fb 100644 --- a/mindspore/lite/test/models_npu.cfg +++ b/mindspore/lite/test/models_npu.cfg @@ -70,3 +70,4 @@ ml_video_edit_v10_best_model_nomean_20200723 8 ml_edu_kit_hand_key_position.onnx 2 #ml_video_edit_oneclick_adaptis.pb #too many subgraphs densenet.tflite 3 +resnet_v2_101_299.tflite 1