diff --git a/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.cc new file mode 100644 index 0000000000..6ae717cb48 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.cc @@ -0,0 +1,125 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/agent/npu/npu_add_transform_pass.h" +#include "src/runtime/agent/npu/npu_pass_utils.h" +namespace mindspore::lite { +using kernel::KERNEL_ARCH::kNPU; +int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel) { + std::vector out_kernels; + + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel == after_kernel) { + out_kernels.push_back(trans_kernel); + } else { + out_kernels.push_back(out_kernel); + } + } + NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); + return RET_OK; +} + +int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, + std::vector *all_tensors) { + auto kernel = *it; + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { + continue; + } + + std::vector nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], + kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; + auto nh2nc_tensor = + new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); + std::vector nh2nc_tensors = {nh2nc_tensor}; + all_tensors->push_back(nh2nc_tensors[0]); + + auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; + auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); + std::vector nc2nh_tensors = {nc2nh_tensor}; + all_tensors->push_back(nc2nh_tensors[0]); + + auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); + auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); + all_kernels->push_back(nh2nc_kernel); + auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); + auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); + all_kernels->push_back(nc2nh_kernel); + NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); + NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); + UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); + UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); + } + return RET_OK; +} + +int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel) { + std::vector next_in_tensors; + for (auto next_in_tensor : next_kernel->in_tensors()) { + if (next_in_tensor != kernel->out_tensors()[0]) { + next_in_tensors.push_back(next_in_tensor); + } else { + next_in_tensors.push_back(trans_kernel->out_tensors()[0]); + } + } + next_kernel->set_in_tensors(next_in_tensors); + std::vector next_in_kernels; + for (auto in_kernel : next_kernel->in_kernels()) { + if (in_kernel == kernel) { + next_in_kernels.push_back(trans_kernel); + } else { + next_in_kernels.push_back(in_kernel); + } + } + NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, + next_kernel->out_tensors()); + return RET_OK; +} + +int NPUAddTransformPass::Run(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors) { + if (context->IsNpuEnabled()) { + std::vector new_kernels; + + for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { + auto kernel = *it; + new_kernels.push_back(kernel); + if (kernel->desc().arch != kNPU) { + continue; + } + if (kernel->Type() == schema::PrimitiveType_Add && kernel->out_kernels().size() >= 2) { + int sum = 0; + for (auto i : kernel->out_kernels()) { + if (i->Type() == schema::PrimitiveType_Nhwc2Nchw) { + sum++; + } + } + if (kernel->out_kernels().size() != sum) { + InsertNode(context, it, &new_kernels, all_tensors); + } + } + } + + all_kernels->clear(); + for (int i = 0; i < new_kernels.size(); i++) { + all_kernels->push_back(new_kernels[i]); + } + } + return RET_OK; +} +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.h new file mode 100644 index 0000000000..bc7ff39f1b --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ +#include +#include "src/lite_kernel.h" +#include "src/ops/primitive_c.h" +namespace mindspore::lite { +class NPUAddTransformPass { + public: + int Run(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors); + + private: + int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel); + + int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel); + + int InsertNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, std::vector *all_tensors); + + private: + int total = 0; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.cc new file mode 100644 index 0000000000..bf35aa2215 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.cc @@ -0,0 +1,125 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/agent/npu/npu_concat_transform_pass.h" +#include "src/runtime/agent/npu/npu_pass_utils.h" +namespace mindspore::lite { +using kernel::KERNEL_ARCH::kNPU; +int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel) { + std::vector out_kernels; + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel == after_kernel) { + out_kernels.push_back(trans_kernel); + } else { + out_kernels.push_back(out_kernel); + } + } + NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); + return RET_OK; +} + +int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, + std::vector *all_tensors) { + for (auto kernel : (*it)->in_kernels()) { + if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { + continue; + } + auto out_kernel = (*it); + std::vector nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], + kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; + auto nh2nc_tensor = + new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); + std::vector nh2nc_tensors = {nh2nc_tensor}; + all_tensors->push_back(nh2nc_tensors[0]); + + auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; + auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); + std::vector nc2nh_tensors = {nc2nh_tensor}; + all_tensors->push_back(nc2nh_tensors[0]); + + auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); + auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); + all_kernels->push_back(nh2nc_kernel); + auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); + auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); + all_kernels->push_back(nc2nh_kernel); + NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); + NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); + UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); + UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); + } + return RET_OK; +} + +int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, + kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel) { + std::vector next_in_tensors; + for (auto next_in_tensor : next_kernel->in_tensors()) { + if (next_in_tensor != kernel->out_tensors()[0]) { + next_in_tensors.push_back(next_in_tensor); + } else { + next_in_tensors.push_back(trans_kernel->out_tensors()[0]); + } + } + next_kernel->set_in_tensors(next_in_tensors); + std::vector next_in_kernels; + for (auto in_kernel : next_kernel->in_kernels()) { + if (in_kernel == kernel) { + next_in_kernels.push_back(trans_kernel); + } else { + next_in_kernels.push_back(in_kernel); + } + } + NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, + next_kernel->out_tensors()); + return RET_OK; +} + +int NPUConcatTransformPass::Run(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors) { + if (context->IsNpuEnabled()) { + std::vector new_kernels; + + for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { + auto kernel = *it; + if (kernel->desc().arch != kNPU) { + new_kernels.push_back(kernel); + continue; + } + if (kernel->Type() == schema::PrimitiveType_Concat && kernel->in_kernels().size() >= 2) { + int sum = 0; + for (auto i : kernel->in_kernels()) { + if (i->Type() == schema::PrimitiveType_Nchw2Nhwc) { + sum++; + } + } + if (kernel->out_kernels().size() != sum) { + InsertNode(context, it, &new_kernels, all_tensors); + } + } + new_kernels.push_back(kernel); + } + + all_kernels->clear(); + for (int i = 0; i < new_kernels.size(); i++) { + all_kernels->push_back(new_kernels[i]); + } + } + return RET_OK; +} +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.h new file mode 100644 index 0000000000..7a15879cd0 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ +#include +#include "src/lite_kernel.h" +#include "src/ops/primitive_c.h" +namespace mindspore::lite { +class NPUConcatTransformPass { + public: + int Run(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors); + + private: + int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel); + + int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel); + + int InsertNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, std::vector *all_tensors); + + private: + int total = 0; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc index 0028fbceae..ab408dbcde 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc @@ -106,24 +106,19 @@ void UpdatePreTensors(kernel::LiteKernel *cur_kernel) { } void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { - auto tensors_vec = cur_kernel->out_tensors(); + auto tensor = cur_kernel->out_tensors()[0]; for (auto out_kernel : cur_kernel->out_kernels()) { - auto in_tensor = out_kernel->in_tensors()[0]; auto out_tensor = out_kernel->out_tensors()[0]; - auto post_kernel = out_kernel->out_kernels()[0]; - lite::Tensor *cur_tensor = nullptr; - for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) { - if (post_kernel->in_tensors()[i] == out_tensor) { - cur_tensor = post_kernel->in_tensors()[i]; - } - } - for (size_t i = 0; i < tensors_vec.size(); i++) { - if (tensors_vec[i] == in_tensor) { - tensors_vec[i] = cur_tensor; + for (auto post_kernel : out_kernel->out_kernels()) { + auto tensors_vec = post_kernel->in_tensors(); + for (int i = 0; i < tensors_vec.size(); i++) { + if (tensors_vec[i] == out_tensor) { + tensors_vec[i] = tensor; + } } + post_kernel->set_in_tensors(tensors_vec); } } - cur_kernel->set_out_tensors(tensors_vec); } int TransFormAxis(int axis) { diff --git a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc index e8ffc1aa46..eeb732b436 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc @@ -31,7 +31,8 @@ namespace mindspore::kernel { int ConcatBaseCPUKernel::Init() { return RET_OK; } int ConcatBaseCPUKernel::ReSize() { - axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_; + concat_param_->axis_ = + concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_; return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h index cd3b90b467..3a72f2d548 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.h @@ -43,7 +43,6 @@ class ConcatBaseCPUKernel : public LiteKernel { int Run() override { return 0; } protected: - int axis_ = 0; const InnerContext *ctx_ = nullptr; int thread_count_ = 1; ConcatParameter *concat_param_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc index 72519e5045..304705adaf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc @@ -120,8 +120,8 @@ int ConcatFp16CPUKernel::Run() { fp16_output_ = reinterpret_cast(out_tensors_.at(0)->MutableData()); } int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t); - ConcatFp16(reinterpret_cast(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(), - output_shape.size(), reinterpret_cast(fp16_output_), dtype_len); + ConcatFp16(reinterpret_cast(fp16_inputs_.data()), input_num, concat_param_->axis_, + inputs_output_shape.data(), output_shape.size(), reinterpret_cast(fp16_output_), dtype_len); if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) { Float16ToFloat32(fp16_output_, reinterpret_cast(output_addr), out_tensors_.at(0)->ElementsNum()); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc index 9c8664f9a1..bbbb69bd10 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc @@ -59,8 +59,8 @@ int ConcatCPUKernel::DoConcat(int task_id) { inputs_output_shape[input_num] = output_shape.data(); auto output_addr = out_tensors_.at(0)->MutableData(); - Concat(inputs_addr.data(), input_num, axis_, inputs_output_shape.data(), output_shape.size(), output_addr, task_id, - thread_count_); + Concat(inputs_addr.data(), input_num, concat_param_->axis_, inputs_output_shape.data(), output_shape.size(), + output_addr, task_id, thread_count_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc index 9dd72bef06..7e1549460e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc @@ -88,7 +88,7 @@ int ConcatInt8CPUKernel::ReSize() { } before_axis_size = 1; - for (int i = 0; i < axis_; i++) { + for (int i = 0; i < concat_param_->axis_; i++) { before_axis_size *= out_tensors_.at(kOutputIndex)->DimensionSize(i); } @@ -104,7 +104,7 @@ int ConcatInt8CPUKernel::ReSize() { memcpy(reinterpret_cast(concat_param_->output_shapes_), output_tensor->shape().data(), sizeof(int) * output_dim); - for (size_t i = axis_ + 1; i < output_dim; i++) { + for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) { after_axis_size *= concat_param_->output_shapes_[i]; } concat_param_->after_axis_size = after_axis_size; @@ -137,7 +137,7 @@ int ConcatInt8CPUKernel::DoExecute(int task_id) { if (real_dst_count <= 0) { return lite::RET_OK; } - Int8Concat(input_data_, output_data_, concat_param_, axis_, real_dst_count, task_id); + Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id); return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc b/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc index 7161aac429..e094ef31ad 100644 --- a/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Concat; namespace mindspore::kernel { int ConcatNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - return RET_ERROR; + return RET_OK; } int ConcatNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, @@ -34,7 +34,7 @@ int ConcatNPUKernel::SetNPUInputs(const std::vector &inputs, con MS_LOG(ERROR) << name_ << " op is nullptr"; return RET_ERROR; } - op_->set_attr_concat_dim(axis_); + op_->set_attr_concat_dim(concat_param_->axis_); op_->set_attr_N(npu_inputs.size()); op_->create_dynamic_input_x(npu_inputs.size()); for (int i = 0; i < npu_inputs.size(); ++i) { diff --git a/mindspore/lite/src/runtime/kernel/npu/concat_npu.h b/mindspore/lite/src/runtime/kernel/npu/concat_npu.h index a61344a376..4a27fba4aa 100644 --- a/mindspore/lite/src/runtime/kernel/npu/concat_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/concat_npu.h @@ -20,6 +20,7 @@ #include "nnacl/concat_parameter.h" #include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" + namespace mindspore::kernel { class ConcatNPUKernel : public NPUKernel { public: @@ -27,8 +28,7 @@ class ConcatNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto concat_parameter = reinterpret_cast(parameter); - axis_ = concat_parameter->axis_; + concat_param_ = reinterpret_cast(parameter); } ~ConcatNPUKernel() override; @@ -40,7 +40,7 @@ class ConcatNPUKernel : public NPUKernel { private: hiai::op::ConcatD *op_ = nullptr; - int axis_; + ConcatParameter *concat_param_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONCAT_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc index 25d6b4c301..d3063971d0 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Pooling; namespace mindspore::kernel { int PoolingNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - return RET_ERROR; + return RET_OK; } int PoolingNPUKernel::SetPoolingParam() { diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 3282708f1c..2b5bf7b908 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -35,6 +35,8 @@ #include "src/runtime/agent/npu/npu_manager.h" #include "src/runtime/agent/npu/npu_transform_pass.h" #include "src/runtime/agent/npu/npu_fusion_pass.h" +#include "src/runtime/agent/npu/npu_add_transform_pass.h" +#include "src/runtime/agent/npu/npu_concat_transform_pass.h" #endif namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; @@ -532,6 +534,23 @@ int Scheduler::RunPass(std::vector *dst_kernels) { MS_LOG(ERROR) << "Run npu format transform pass failed."; return ret; } + + auto add_format_pass = new NPUAddTransformPass; + ret = add_format_pass->Run(context_, dst_kernels, &src_tensors_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run npu add op insert transform pass failed."; + return ret; + } + delete add_format_pass; + + auto concat_format_pass = new NPUConcatTransformPass; + ret = concat_format_pass->Run(context_, dst_kernels, &src_tensors_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run npu concat op insert transform pass failed."; + return ret; + } + delete concat_format_pass; + auto fusion_pass = new NPUFusionPass(dst_kernels); ret = fusion_pass->Fusion(); if (ret != RET_OK) {