diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.cc deleted file mode 100644 index 349703c4af..0000000000 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.cc +++ /dev/null @@ -1,126 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" -#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" -namespace mindspore::lite { -using kernel::KERNEL_ARCH::kNPU; -int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel) { - std::vector out_kernels; - - for (auto out_kernel : kernel->out_kernels()) { - if (out_kernel == after_kernel) { - out_kernels.push_back(trans_kernel); - } else { - out_kernels.push_back(out_kernel); - } - } - NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); - return RET_OK; -} - -int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector::iterator it, - std::vector *all_kernels, - std::vector *all_tensors) { - auto kernel = *it; - for (auto out_kernel : kernel->out_kernels()) { - if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { - continue; - } - - std::vector nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], - kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; - auto nh2nc_tensor = - new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); - std::vector nh2nc_tensors = {nh2nc_tensor}; - all_tensors->push_back(nh2nc_tensors[0]); - - auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; - auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); - std::vector nc2nh_tensors = {nc2nh_tensor}; - all_tensors->push_back(nc2nh_tensors[0]); - - auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); - auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); - all_kernels->push_back(nh2nc_kernel); - insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); - auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); - auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); - all_kernels->push_back(nc2nh_kernel); - insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); - NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); - NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); - UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); - UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); - } - return RET_OK; -} - -int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel) { - std::vector next_in_tensors; - for (auto next_in_tensor : next_kernel->in_tensors()) { - if (next_in_tensor != kernel->out_tensors()[0]) { - next_in_tensors.push_back(next_in_tensor); - } else { - next_in_tensors.push_back(trans_kernel->out_tensors()[0]); - } - } - next_kernel->set_in_tensors(next_in_tensors); - std::vector next_in_kernels; - for (auto in_kernel : next_kernel->in_kernels()) { - if (in_kernel == kernel) { - next_in_kernels.push_back(trans_kernel); - } else { - next_in_kernels.push_back(in_kernel); - } - } - NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, - next_kernel->out_tensors()); - return RET_OK; -} - -int NPUAddTransformPass::Run() { - if (context_->IsNpuEnabled()) { - std::vector new_kernels; - - for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { - auto kernel = *it; - new_kernels.push_back(kernel); - if (kernel->desc().arch != kNPU) { - continue; - } - if (kernel->Type() == schema::PrimitiveType_Add && kernel->out_kernels().size() >= 2) { - int sum = 0; - for (auto i : kernel->out_kernels()) { - if (i->Type() == schema::PrimitiveType_Nhwc2Nchw) { - sum++; - } - } - if (kernel->out_kernels().size() != sum) { - InsertNode(context_, it, &new_kernels, all_tensors_); - } - } - } - - all_kernels_->clear(); - for (int i = 0; i < new_kernels.size(); i++) { - all_kernels_->push_back(new_kernels[i]); - } - } - return RET_OK; -} -} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.h deleted file mode 100644 index e77f3e93ef..0000000000 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.h +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ -#include -#include "src/lite_kernel.h" -#include "src/ops/primitive_c.h" -#include "src/runtime/agent/npu/optimizer/npu_base_pass.h" -namespace mindspore::lite { -class NPUAddTransformPass : public NPUBasePass { - public: - explicit NPUAddTransformPass(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors) { - context_ = context; - all_kernels_ = all_kernels; - all_tensors_ = all_tensors; - name_ = "NPUConcatTransformPass"; - } - ~NPUAddTransformPass() override { - for (auto primitive : insert_primitive_) { - delete primitive; - } - insert_primitive_.clear(); - } - int Run() override; - - private: - int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel); - - int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel); - - int InsertNode(const InnerContext *context, std::vector::iterator it, - std::vector *all_kernels, std::vector *all_tensors); - - private: - int total = 0; - const InnerContext *context_; - std::vector *all_kernels_; - std::vector insert_primitive_; - std::vector *all_tensors_; -}; -} // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.cc deleted file mode 100644 index 8eb3239961..0000000000 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.cc +++ /dev/null @@ -1,126 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" -#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" -namespace mindspore::lite { -using kernel::KERNEL_ARCH::kNPU; -int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel) { - std::vector out_kernels; - for (auto out_kernel : kernel->out_kernels()) { - if (out_kernel == after_kernel) { - out_kernels.push_back(trans_kernel); - } else { - out_kernels.push_back(out_kernel); - } - } - NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); - return RET_OK; -} - -int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector::iterator it, - std::vector *all_kernels, - std::vector *all_tensors) { - for (auto kernel : (*it)->in_kernels()) { - if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { - continue; - } - auto out_kernel = (*it); - std::vector nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], - kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; - auto nh2nc_tensor = - new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); - std::vector nh2nc_tensors = {nh2nc_tensor}; - all_tensors->push_back(nh2nc_tensors[0]); - - auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; - auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); - std::vector nc2nh_tensors = {nc2nh_tensor}; - all_tensors->push_back(nc2nh_tensors[0]); - - auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); - auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); - all_kernels->push_back(nh2nc_kernel); - insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); - auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); - auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); - all_kernels->push_back(nc2nh_kernel); - insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); - NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); - NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); - UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); - UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); - } - return RET_OK; -} - -int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, - kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel) { - std::vector next_in_tensors; - for (auto next_in_tensor : next_kernel->in_tensors()) { - if (next_in_tensor != kernel->out_tensors()[0]) { - next_in_tensors.push_back(next_in_tensor); - } else { - next_in_tensors.push_back(trans_kernel->out_tensors()[0]); - } - } - next_kernel->set_in_tensors(next_in_tensors); - std::vector next_in_kernels; - for (auto in_kernel : next_kernel->in_kernels()) { - if (in_kernel == kernel) { - next_in_kernels.push_back(trans_kernel); - } else { - next_in_kernels.push_back(in_kernel); - } - } - NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, - next_kernel->out_tensors()); - return RET_OK; -} - -int NPUConcatTransformPass::Run() { - if (context_->IsNpuEnabled()) { - std::vector new_kernels; - - for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { - auto kernel = *it; - if (kernel->desc().arch != kNPU) { - new_kernels.push_back(kernel); - continue; - } - if (kernel->Type() == schema::PrimitiveType_Concat && kernel->in_kernels().size() >= 2) { - int sum = 0; - for (auto i : kernel->in_kernels()) { - if (i->Type() == schema::PrimitiveType_Nchw2Nhwc) { - sum++; - } - } - if (kernel->out_kernels().size() != sum) { - InsertNode(context_, it, &new_kernels, all_tensors_); - } - } - new_kernels.push_back(kernel); - } - - all_kernels_->clear(); - for (int i = 0; i < new_kernels.size(); i++) { - all_kernels_->push_back(new_kernels[i]); - } - } - return RET_OK; -} -} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc index 10ecb4593f..d6dfc3c2ea 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc @@ -34,6 +34,28 @@ bool CheckFusion(kernel::LiteKernel *kernel) { return post_flag; } +bool CheckFormatFusion(kernel::LiteKernel *kernel) { + if (kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { + return std::all_of( + kernel->out_kernels().begin(), kernel->out_kernels().end(), + [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; }); + } + if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { + return std::all_of( + kernel->out_kernels().begin(), kernel->out_kernels().end(), + [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); + } + return false; +} + +void NPUFusionPass::RemoveAndFreeKernel(kernel::LiteKernel *cur_kernel) { + auto itr = find(kernels->begin(), kernels->end(), cur_kernel); + if (itr != kernels->end()) { + kernels->erase(itr); + } + delete cur_kernel; +} + void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { for (auto in_kernel : cur_kernel->in_kernels()) { auto pre_kernel = in_kernel->in_kernels()[0]; @@ -55,6 +77,7 @@ void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { } } cur_kernel->set_in_kernels(cur_in_kernels); + RemoveAndFreeKernel(in_kernel); } } @@ -79,6 +102,7 @@ void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { } } cur_kernel->set_out_kernels(cur_out_kernels); + RemoveAndFreeKernel(out_kernel); } } @@ -163,34 +187,52 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { if (kernel->out_kernels().empty()) { return RET_OK; } - if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { - return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; - })) { + if (!CheckFormatFusion(kernel)) { return RET_OK; } - auto pre_kernel = kernel->in_kernels()[0]; - auto pre_out_kernels = pre_kernel->out_kernels(); - for (size_t i = 0; i < pre_out_kernels.size(); i++) { - if (pre_out_kernels[i] == kernel) { - pre_out_kernels.erase(pre_out_kernels.begin() + i); - break; - } + auto pre_kernel = kernel->in_kernels()[0]; + auto in_tensor = kernel->in_tensors()[0]; + auto out_tensor = kernel->out_tensors()[0]; + auto tensor_itr = std::find(pre_kernel->out_tensors().begin(), pre_kernel->out_tensors().end(), in_tensor); + if (tensor_itr != pre_kernel->out_tensors().end()) { + in_tensor = *tensor_itr; + } else { + MS_LOG(ERROR) << "Can't find the connneted tensor between kernel " << kernel->name() << " and it's pre_kernel."; + return RET_ERROR; } - for (const auto &nc2nh : kernel->out_kernels()) { - for (const auto &post_kernel : nc2nh->out_kernels()) { + + std::vector pre_insert_kernels; + for (const auto &trans_kernel : kernel->out_kernels()) { + for (const auto &post_kernel : trans_kernel->out_kernels()) { + // update tensor + auto tensors_vec = post_kernel->in_tensors(); + for (size_t i = 0; i < tensors_vec.size(); i++) { + if (tensors_vec[i] == out_tensor) { + tensors_vec[i] = in_tensor; + break; + } + } + post_kernel->set_in_tensors(tensors_vec); + + // update kernel auto post_in_kernels = post_kernel->in_kernels(); for (size_t i = 0; i < post_in_kernels.size(); i++) { - if (post_in_kernels[i] == nc2nh) { + if (post_in_kernels[i] == trans_kernel) { post_in_kernels[i] = pre_kernel; break; } } post_kernel->set_in_kernels(post_in_kernels); - pre_out_kernels.push_back(post_kernel); + pre_insert_kernels.push_back(post_kernel); + RemoveAndFreeKernel(trans_kernel); } } - pre_kernel->set_out_kernels(pre_out_kernels); + auto pre_out_kernels = pre_kernel->out_kernels(); + auto itr = find(pre_out_kernels.begin(), pre_out_kernels.end(), kernel); + pre_out_kernels.insert(itr, pre_insert_kernels.begin(), pre_insert_kernels.end()); + pre_kernel->set_in_kernels(pre_out_kernels); + RemoveAndFreeKernel(kernel); return RET_OK; } @@ -201,6 +243,7 @@ int NPUFusionPass::Run() { ConcatFusion(kernel); continue; case schema::PrimitiveType_Add: + case schema::PrimitiveType_Activation: AddFusion(kernel); continue; case schema::PrimitiveType_Nchw2Nhwc: diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h index f31ff54a64..ede9818749 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h @@ -33,11 +33,12 @@ class NPUFusionPass : public NPUBasePass { int Run() override; protected: + void RemoveAndFreeKernel(kernel::LiteKernel *cur_kernel); + void UpdatePreKernels(kernel::LiteKernel *kernel); + void UpdatePostKernels(kernel::LiteKernel *kernel); int ConcatFusion(kernel::LiteKernel *kernel); int AddFusion(kernel::LiteKernel *kernel); int FormatFusion(kernel::LiteKernel *kernel); - void UpdatePreKernels(kernel::LiteKernel *kernel); - void UpdatePostKernels(kernel::LiteKernel *kernel); private: std::vector *kernels; diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc new file mode 100644 index 0000000000..8529f11c50 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc @@ -0,0 +1,139 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" +#include +#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" + +namespace mindspore::lite { +using kernel::KERNEL_ARCH::kNPU; +enum InsertState { InsertNone, PreInsert, PostInsert }; + +std::set npu_insert_nodes = {schema::PrimitiveType_Concat, schema::PrimitiveType_Add}; + +int GetInsertState(kernel::LiteKernel *kernel) { + if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) { + return InsertNone; + } + auto pre_flag = + std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), + [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; }); + auto post_flag = + std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), + [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); + if (pre_flag && !post_flag) { + return PostInsert; + } + if (!pre_flag && post_flag) { + return PreInsert; + } + return InsertNone; +} + +int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, + std::vector *all_kernels, + std::vector *all_tensors) { + for (auto kernel : cur_kernel->in_kernels()) { + if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { + continue; + } + auto nhwc_shape = cur_kernel->out_tensors()[0]->shape(); + std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; + auto nh2nc_tensor = new Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); + std::vector nh2nc_tensors = {nh2nc_tensor}; + all_tensors->push_back(nh2nc_tensors[0]); + + auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); + std::vector nc2nh_tensors = {nc2nh_tensor}; + all_tensors->push_back(nc2nh_tensors[0]); + + auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); + auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); + all_kernels->push_back(nh2nc_kernel); + insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); + auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); + auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); + all_kernels->push_back(nc2nh_kernel); + insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); + NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); + NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {cur_kernel}, nh2nc_tensors, nc2nh_tensors); + NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, cur_kernel); + NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, cur_kernel); + } + return RET_OK; +} + +int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, + std::vector *all_kernels, + std::vector *all_tensors) { + for (auto out_kernel : cur_kernel->out_kernels()) { + if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { + continue; + } + auto nhwc_shape = cur_kernel->out_tensors()[0]->shape(); + std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; + + auto nh2nc_tensor = + new Tensor(cur_kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); + std::vector nh2nc_tensors = {nh2nc_tensor}; + all_tensors->push_back(nh2nc_tensors[0]); + + auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); + std::vector nc2nh_tensors = {nc2nh_tensor}; + all_tensors->push_back(nc2nh_tensors[0]); + + auto nh2nc_name = cur_kernel->name() + "_nh2nc_" + std::to_string(total++); + auto *nh2nc_kernel = + NPUPassUtils::CreateNhwc2NchwKernel(cur_kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); + all_kernels->push_back(nh2nc_kernel); + insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); + auto nc2nh_name = cur_kernel->name() + "_nc2nh_" + std::to_string(total++); + auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); + all_kernels->push_back(nc2nh_kernel); + insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); + NPUPassUtils::UpdateKernel(nh2nc_kernel, {cur_kernel}, {nc2nh_kernel}, cur_kernel->out_tensors(), nh2nc_tensors); + NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); + NPUPassUtils::UpdateNH2NCTransNodePreKernel(cur_kernel, nh2nc_kernel, out_kernel); + NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(cur_kernel, nc2nh_kernel, out_kernel); + } + return RET_OK; +} + +int NPUInsertTransformPass::Run() { + if (!context_->IsNpuEnabled()) { + return RET_OK; + } + for (size_t i = 0; i < all_kernels_->size(); i++) { + auto kernel = (*all_kernels_)[i]; + if (kernel->desc().arch != kNPU) { + continue; + } + auto insert_state = GetInsertState(kernel); + if (insert_state == PreInsert) { + std::vector pre_kernels; + InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); + all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); + i += pre_kernels.size(); + } + if (insert_state == PostInsert) { + std::vector post_kernels; + InsertPostNode(context_, kernel, &post_kernels, all_tensors_); + all_kernels_->insert(all_kernels_->begin() + i + 1, post_kernels.begin(), post_kernels.end()); + i += post_kernels.size(); + } + } + return RET_OK; +} +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h similarity index 60% rename from mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h index 50fa3846e5..0671a0a753 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h @@ -14,23 +14,25 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_INSERT_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_INSERT_TRANSFORM_PASS_H_ #include #include "src/lite_kernel.h" #include "src/ops/primitive_c.h" #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" + namespace mindspore::lite { -class NPUConcatTransformPass : public NPUBasePass { +class NPUInsertTransformPass : public NPUBasePass { public: - explicit NPUConcatTransformPass(const InnerContext *context, std::vector *all_kernels, + explicit NPUInsertTransformPass(const InnerContext *context, std::vector *all_kernels, std::vector *all_tensors) { context_ = context; all_kernels_ = all_kernels; all_tensors_ = all_tensors; - name_ = "NPUConcatTransformPass"; + name_ = "NPUInsertTransformPass"; } - ~NPUConcatTransformPass() override { + + ~NPUInsertTransformPass() override { for (auto primitive : insert_primitive_) { delete primitive; } @@ -39,14 +41,11 @@ class NPUConcatTransformPass : public NPUBasePass { int Run() override; private: - int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel); - - int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel); + int InsertPreNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, + std::vector *all_kernels, std::vector *all_tensors); - int InsertNode(const InnerContext *context, std::vector::iterator it, - std::vector *all_kernels, std::vector *all_tensors); + int InsertPostNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, + std::vector *all_kernels, std::vector *all_tensors); private: int total = 0; @@ -56,4 +55,4 @@ class NPUConcatTransformPass : public NPUBasePass { std::vector insert_primitive_; }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_INSERT_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc index 06fb85e6c6..b0992fccc9 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc @@ -99,4 +99,76 @@ void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vectorset_in_kernels(in_kernels); kernel->set_out_kernels(out_kernels); } + +void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel) { + std::vector out_kernels; + + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel == after_kernel) { + out_kernels.push_back(trans_kernel); + } else { + out_kernels.push_back(out_kernel); + } + } + UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); +} + +void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel) { + std::vector cur_out_kernels; + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel == next_kernel) { + cur_out_kernels.push_back(trans_kernel); + } else { + cur_out_kernels.push_back(out_kernel); + } + } + auto kernel_out_tensor = kernel->out_tensors()[0]; + // Change format the output of the current kernel nhwc->nchw + auto nhwc_shape = kernel_out_tensor->shape(); + std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; + kernel_out_tensor->set_format(schema::Format_NCHW); + kernel_out_tensor->set_shape(nchw_shape); + UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); +} + +void NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *before_kernel) { + std::vector cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; + for (int i = 1; i < kernel->in_tensors().size(); i++) { + cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); + } + std::vector cur_in_kernels = {trans_kernel}; + for (int i = 0; i < kernel->in_kernels().size(); i++) { + auto in_kernel = kernel->in_kernels()[i]; + if (in_kernel != kernel) { + cur_in_kernels.push_back(in_kernel); + } + } + UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, kernel->out_tensors()); +} + +void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel) { + std::vector next_in_tensors; + for (auto next_in_tensor : next_kernel->in_tensors()) { + if (next_in_tensor != kernel->out_tensors()[0]) { + next_in_tensors.push_back(next_in_tensor); + } else { + next_in_tensors.push_back(trans_kernel->out_tensors()[0]); + } + } + next_kernel->set_in_tensors(next_in_tensors); + std::vector next_in_kernels; + for (auto in_kernel : next_kernel->in_kernels()) { + if (in_kernel == kernel) { + next_in_kernels.push_back(trans_kernel); + } else { + next_in_kernels.push_back(in_kernel); + } + } + NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, + next_kernel->out_tensors()); +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h index 2f843d1de0..b7ff59e8b2 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h @@ -35,6 +35,18 @@ class NPUPassUtils { const std::vector &out_kernels, const std::vector &in_tensors, const std::vector &out_tensors); + static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel); + + static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel); + + static void UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *before_kernel); + + static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel); + private: static PrimitiveC *CreateNchw2NhwcPrimitive(); diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc index 918843c665..80ffe0930e 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc @@ -21,43 +21,9 @@ namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; using kernel::KERNEL_ARCH::kNPU; -int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel) { - std::vector out_kernels; - - for (auto out_kernel : kernel->out_kernels()) { - if (out_kernel == after_kernel) { - out_kernels.push_back(trans_kernel); - } else { - out_kernels.push_back(out_kernel); - } - } - NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); - return RET_OK; -} - -int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *before_kernel) { - std::vector cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; - for (int i = 1; i < kernel->in_tensors().size(); i++) { - cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); - } - std::vector cur_in_kernels = {trans_kernel}; - for (int i = 0; i < kernel->in_kernels().size(); i++) { - auto in_kernel = kernel->in_kernels()[i]; - if (in_kernel != kernel) { - cur_in_kernels.push_back(in_kernel); - } - } - NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, - kernel->out_tensors()); - return RET_OK; -} - -int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector::iterator it, +int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, std::vector *all_kernels, std::vector *all_tensors) { - auto kernel = *it; bool is_input_kernel = kernel->in_kernels().empty(); if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { @@ -66,9 +32,9 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vectorin_kernels()[0]; } // Create pre transform kernel out tensors. - std::vector shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3], - kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]}; - auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR); + auto nhwc_shape = kernel->in_tensors()[0]->shape(); + std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; + auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); std::vector pre_trans_out_tensors = {tensor}; all_tensors->push_back(pre_trans_out_tensors[0]); // Replace the output tensor of the previous node @@ -89,17 +55,16 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector::iterator it, +int NPUTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, std::vector *all_kernels, std::vector *all_tensors) { - auto kernel = *it; // Model output does not insert operator if (kernel->out_kernels().empty()) { return RET_OK; @@ -111,9 +76,8 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vectornchw - auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1], - kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]}; - auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR); + auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), kernel->out_tensors()[0]->shape(), + schema::Format_NHWC, Tensor::VAR); std::vector post_trans_out_tensors = {tensor}; all_tensors->push_back(post_trans_out_tensors[0]); // Use the output tensor of the current node as the input tensor of the post-conversion operator @@ -126,81 +90,32 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vectorGetPrimitive()); // Directly insert in the back, will not affect the topological sort all_kernels->push_back(post_trans_kernel); - UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); - UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); - } - return RET_OK; -} - -int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel) { - std::vector cur_out_kernels; - for (auto out_kernel : kernel->out_kernels()) { - if (out_kernel == next_kernel) { - cur_out_kernels.push_back(trans_kernel); - } else { - cur_out_kernels.push_back(out_kernel); - } + NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); + NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); } - auto kernel_out_tensor = kernel->out_tensors()[0]; - // Change format the output of the current kernel nhwc->nchw - std::vector kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3], - kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]}; - kernel_out_tensor->set_format(schema::Format_NCHW); - kernel_out_tensor->set_shape(kernel_out_new_shapes); - NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); return RET_OK; } -int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel) { - std::vector next_in_tensors; - for (auto next_in_tensor : next_kernel->in_tensors()) { - if (next_in_tensor != kernel->out_tensors()[0]) { - next_in_tensors.push_back(next_in_tensor); - } else { - next_in_tensors.push_back(trans_kernel->out_tensors()[0]); - } +int NPUTransformPass::Run() { + if (!context_->IsNpuEnabled()) { + return RET_OK; } - next_kernel->set_in_tensors(next_in_tensors); - std::vector next_in_kernels; - for (auto in_kernel : next_kernel->in_kernels()) { - if (in_kernel == kernel) { - next_in_kernels.push_back(trans_kernel); - } else { - next_in_kernels.push_back(in_kernel); + for (size_t i = 0; i < all_kernels_->size();) { + auto kernel = (*all_kernels_)[i]; + if (kernel->desc().arch != kNPU || npu_trans_nodes.find(kernel->Type()) == npu_trans_nodes.end()) { + i++; + continue; } - } - NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, - next_kernel->out_tensors()); + std::vector pre_kernels; + InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); + all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); + i += (pre_kernels.size() + 1); - return RET_OK; -} - -int NPUTransformPass::Run() { - if (context_->IsNpuEnabled()) { - std::vector new_kernels; - - for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { - auto kernel = *it; - if (kernel->desc().arch != kNPU) { - new_kernels.push_back(kernel); - continue; - } - if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { - InsertPreNode(context_, it, &new_kernels, all_tensors_); - new_kernels.push_back(kernel); - InsertPostNode(context_, it, &new_kernels, all_tensors_); - } else { - new_kernels.push_back(kernel); - } - } - all_kernels_->clear(); - for (int i = 0; i < new_kernels.size(); i++) { - all_kernels_->push_back(new_kernels[i]); - } + std::vector post_kernels; + InsertPostNode(context_, kernel, &post_kernels, all_tensors_); + all_kernels_->insert(all_kernels_->begin() + i, post_kernels.begin(), post_kernels.end()); + i += post_kernels.size(); } return RET_OK; } - } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h index 09dd562631..8f6115e00d 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h @@ -20,6 +20,7 @@ #include "src/lite_kernel.h" #include "src/ops/primitive_c.h" #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" + namespace mindspore::lite { class NPUTransformPass : public NPUBasePass { public: @@ -32,6 +33,7 @@ class NPUTransformPass : public NPUBasePass { all_tensors_ = all_tensors; name_ = "NPUTransformPass"; } + ~NPUTransformPass() override { for (auto primitive : insert_primitive_) { delete primitive; @@ -40,22 +42,10 @@ class NPUTransformPass : public NPUBasePass { } private: - int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel); - - int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *before_kernel); - - int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *after_kernel); - - int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, - kernel::LiteKernel *next_kernel); - - int InsertPreNode(const InnerContext *context, std::vector::iterator it, + int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, std::vector *all_kernels, std::vector *all_tensors); - int InsertPostNode(const InnerContext *context, std::vector::iterator it, + int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, std::vector *all_kernels, std::vector *all_tensors); private: diff --git a/mindspore/lite/src/runtime/kernel/npu/activation.cc b/mindspore/lite/src/runtime/kernel/npu/activation_npu.cc similarity index 96% rename from mindspore/lite/src/runtime/kernel/npu/activation.cc rename to mindspore/lite/src/runtime/kernel/npu/activation_npu.cc index 13d8d0084f..c12ae00244 100644 --- a/mindspore/lite/src/runtime/kernel/npu/activation.cc +++ b/mindspore/lite/src/runtime/kernel/npu/activation_npu.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "src/runtime/kernel/npu/activation.h" +#include "src/runtime/kernel/npu/activation_npu.h" #include "include/graph/op/all_ops.h" #include "src/kernel_registry.h" @@ -37,7 +37,7 @@ int ActivationNPUKernel::IsSupport(const std::vector &inputs, int ActivationNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { - act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act"); + act_ = new (std::nothrow) hiai::op::Activation(name_); if (act_ == nullptr) { MS_LOG(ERROR) << "New activation npu operator for activation op " << name_ << " failed."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/npu/activation.h b/mindspore/lite/src/runtime/kernel/npu/activation_npu.h similarity index 100% rename from mindspore/lite/src/runtime/kernel/npu/activation.h rename to mindspore/lite/src/runtime/kernel/npu/activation_npu.h diff --git a/mindspore/lite/src/runtime/kernel/npu/batchnorm_npu.cc b/mindspore/lite/src/runtime/kernel/npu/batchnorm_npu.cc new file mode 100644 index 0000000000..726296a090 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/batchnorm_npu.cc @@ -0,0 +1,94 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/npu/batchnorm_npu.h" +#include "include/graph/op/all_ops.h" +#include "src/kernel_registry.h" +#include "src/runtime/agent/npu/npu_converter_utils.h" + +using mindspore::kernel::KERNEL_ARCH::kNPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_FusedBatchNorm; + +namespace mindspore::kernel { +int BatchnormNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, + OpParameter *opParameter) { + return RET_OK; +} + +int BatchnormNPUKernel::SetNPUInputs(const std::vector &inputs, + const std::vector &outputs, + const std::vector &npu_inputs) { + batchnorm_ = new (std::nothrow) ge::op::BatchNormExt2(name_); + if (batchnorm_ == nullptr) { + MS_LOG(ERROR) << "New batchnorm npu operator for batchnorm op " << name_ << " failed."; + return RET_ERROR; + } + batchnorm_->set_input_x(*npu_inputs[0]); + + auto scale = new (std::nothrow) hiai::op::Const(name_ + "_scale"); + if (scale == nullptr) { + MS_LOG(ERROR) << "New scale const failed."; + return RET_ERROR; + } + auto scale_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); + scale->set_attr_value(scale_tensor); + batchnorm_->set_input_scale(*scale); + + auto offset = new (std::nothrow) hiai::op::Const(name_ + "_offset"); + if (offset == nullptr) { + MS_LOG(ERROR) << "New offset const failed."; + return RET_ERROR; + } + auto offset_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); + offset->set_attr_value(offset_tensor); + batchnorm_->set_input_offset(*offset); + + auto mean = new (std::nothrow) hiai::op::Const(name_ + "_mean"); + if (mean == nullptr) { + MS_LOG(ERROR) << "New mean const failed."; + return RET_ERROR; + } + auto mean_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); + mean->set_attr_value(mean_tensor); + batchnorm_->set_input_mean(*mean); + + auto variance = new (std::nothrow) hiai::op::Const(name_ + "_variance"); + if (variance == nullptr) { + MS_LOG(ERROR) << "New variance const failed."; + return RET_ERROR; + } + auto variance_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); + variance->set_attr_value(variance_tensor); + batchnorm_->set_input_variance(*variance); + + batchnorm_->set_attr_epsilon(batchnorm_param_->epsilon_); + batchnorm_->set_attr_momentum(batchnorm_param_->momentum_); + batchnorm_->set_attr_mode(1); + return RET_OK; +} + +ge::Operator *mindspore::kernel::BatchnormNPUKernel::GetNPUOp() { return batchnorm_; } + +BatchnormNPUKernel::~BatchnormNPUKernel() { + if (batchnorm_ != nullptr) { + delete batchnorm_; + batchnorm_ = nullptr; + } +} + +REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FusedBatchNorm, NPUKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/batchnorm_npu.h b/mindspore/lite/src/runtime/kernel/npu/batchnorm_npu.h new file mode 100644 index 0000000000..ae77b4c55c --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/batchnorm_npu.h @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_BATCHNORM_NPU_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_BATCHNORM_NPU_H_ + +#include +#include "include/graph/op/all_ops.h" +#include "include/graph/compatible/all_ops.h" +#include "src/runtime/kernel/npu/npu_kernel.h" +#include "nnacl/batchnorm_parameter.h" + +namespace mindspore::kernel { +class BatchnormNPUKernel : public NPUKernel { + public: + BatchnormNPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : NPUKernel(parameter, inputs, outputs, ctx, primitive) { + batchnorm_param_ = reinterpret_cast(parameter); + } + ~BatchnormNPUKernel() override; + + int IsSupport(const std::vector &inputs, const std::vector &outputs, + OpParameter *opParameter) override; + int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs) override; + ge::Operator *GetNPUOp() override; + + private: + ge::op::BatchNormExt2 *batchnorm_ = nullptr; + BatchNormParameter *batchnorm_param_; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_BATCHNORM_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc index 9e02e6e3d9..e36bf75d61 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc @@ -24,6 +24,10 @@ using mindspore::schema::PrimitiveType_Conv2D; namespace mindspore::kernel { int ConvolutionNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { + if (conv_param_->group_ != 1) { + MS_LOG(WARNING) << "Only support group equals 1 for npu convolution op"; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc new file mode 100644 index 0000000000..ac15301345 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc @@ -0,0 +1,105 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/npu/deconvolution_npu.h" +#include "src/runtime/agent/npu/npu_converter_utils.h" + +using mindspore::kernel::KERNEL_ARCH::kNPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_DeConv2D; + +namespace mindspore::kernel { +int DeconvolutionNPUKernel::IsSupport(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter) { + if (conv_param_->group_ != 1) { + MS_LOG(WARNING) << "Only support group equals 1 for npu deconvolution op"; + return RET_ERROR; + } + return RET_OK; +} + +int DeconvolutionNPUKernel::SetConvParam() { + deconv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); + deconv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); + deconv_->set_attr_groups(conv_param_->group_); + + if (conv_param_->pad_mode_ == Pad_Same) { + deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"}); + deconv_->set_attr_pads(ge::AttrValue::LIST_INT({0, 0, 0, 0})); + } else if (conv_param_->pad_mode_ == Pad_Valid) { + deconv_->set_attr_pad_mode(ge::AttrValue::STR{"VALID"}); + deconv_->set_attr_pads(ge::AttrValue::LIST_INT({0, 0, 0, 0})); + } else { + deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SPECIFIC"}); + deconv_->set_attr_pads( + ge::AttrValue::LIST_INT({conv_param_->pad_u_, conv_param_->pad_d_, conv_param_->pad_l_, conv_param_->pad_r_})); + } + return RET_OK; +} + +int DeconvolutionNPUKernel::SetNPUInputs(const std::vector &inputs, + const std::vector &outputs, + const std::vector &npu_inputs) { + // set conv attr param + deconv_ = new (std::nothrow) hiai::op::ConvTranspose(name_ + "_deconv"); + if (deconv_ == nullptr) { + MS_LOG(ERROR) << "New deconvolution operator for deconvolution op " << name_ << " failed."; + return RET_ERROR; + } + auto ret = SetConvParam(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Set npu op parameter for deconvolution op " << name_ << " failed."; + return RET_ERROR; + } + + ret = InitWeightBiasConst(inputs); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Set weight and bias for deconvolution op " << name_ << " failed when running npu"; + return RET_ERROR; + } + deconv_->set_input_filter(*weight_); + if (inputs.size() == 3) { + deconv_->set_input_bias(*bias_); + } + deconv_->set_input_x(*npu_inputs[0]); + + if (conv_param_->act_type_ != ActType_No) { + ret = SetActivation(deconv_, conv_param_->act_type_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; + return RET_ERROR; + } + } + return RET_OK; +} + +ge::Operator *mindspore::kernel::DeconvolutionNPUKernel::GetNPUOp() { + if (conv_param_->act_type_ == ActType_No) { + return deconv_; + } else { + return act_; + } +} + +DeconvolutionNPUKernel::~DeconvolutionNPUKernel() { + if (deconv_ != nullptr) { + delete deconv_; + deconv_ = nullptr; + } +} + +REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_DeConv2D, NPUKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.h b/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.h new file mode 100644 index 0000000000..a1e4a1ad91 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.h @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_DECONVOLUTION_NPU_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_DECONVOLUTION_NPU_H_ + +#include +#include "include/graph/op/all_ops.h" +#include "src/runtime/kernel/npu/convolution_base_npu.h" +#include "nnacl/conv_parameter.h" + +namespace mindspore::kernel { +class DeconvolutionNPUKernel : public ConvolutionBaseNPUKernel { + public: + DeconvolutionNPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : ConvolutionBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { + conv_param_ = reinterpret_cast(parameter); + } + ~DeconvolutionNPUKernel() override; + + int IsSupport(const std::vector &inputs, const std::vector &outputs, + OpParameter *opParameter) override; + int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs) override; + ge::Operator *GetNPUOp() override; + + private: + int SetConvParam(); + hiai::op::ConvTranspose *deconv_ = nullptr; + ConvParameter *conv_param_; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_DECONVOLUTION_NPU_H_ diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index cd108f2221..dcd9e8b843 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -37,8 +37,7 @@ #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" #include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" -#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" -#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" #endif namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; @@ -570,9 +569,7 @@ int Scheduler::RunPass(std::vector *dst_kernels) { #if SUPPORT_NPU auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); - auto add_format_pass = new NPUAddTransformPass(context_, dst_kernels, src_tensors_); - mindspore::lite::NPUPassManager::GetInstance()->AddPass(add_format_pass); - auto concat_format_pass = new NPUConcatTransformPass(context_, dst_kernels, src_tensors_); + auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); auto fusion_pass = new NPUFusionPass(dst_kernels); mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass);