diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index a2c524bfdc..e0150316a7 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -38,7 +38,24 @@ int NPUExecutor::Prepare(const std::vector &kernels) { return RET_OK; } -bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr npu_tensor) { +std::vector GetNpuTensorShape(int dim, std::shared_ptr npu_tensor) { + std::vector npu_shape; + if (dim > 0) { + npu_shape.push_back(npu_tensor->GetTensorDimension().GetNumber()); + } + if (dim > 1) { + npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel()); + } + if (dim > 2) { + npu_shape.push_back(npu_tensor->GetTensorDimension().GetHeight()); + } + if (dim > 3) { + npu_shape.push_back(npu_tensor->GetTensorDimension().GetWidth()); + } + return npu_shape; +} + +bool IsSameShapeInTensor(Tensor *tensor, std::shared_ptr npu_tensor) { if (tensor->shape().size() > 4) { MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4"; return false; @@ -49,18 +66,15 @@ bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr npu_tenso tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() && tensor->Width() == npu_tensor->GetTensorDimension().GetWidth(); } - std::vector npu_shape; - auto dim = tensor->shape().size(); - if (dim > 0) { - npu_shape.push_back(npu_tensor->GetTensorDimension().GetNumber()); - } - if (dim > 1) { - npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel()); - } - if (dim > 2) { - npu_shape.push_back(npu_tensor->GetTensorDimension().GetHeight()); + return GetNpuTensorShape(tensor->shape().size(), npu_tensor) == tensor->shape(); +} + +bool IsSameShapeOutTensor(Tensor *tensor, std::shared_ptr npu_tensor) { + if (tensor->shape().size() > 4) { + MS_LOG(ERROR) << "Npu does not support output tensor dims greater than 4"; + return false; } - return npu_shape == tensor->shape(); + return GetNpuTensorShape(tensor->shape().size(), npu_tensor) == tensor->shape(); } int NPUExecutor::Run(const std::vector &in_tensors, const std::vector &out_tensors, @@ -72,10 +86,10 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< for (int i = 0; i < npu_input_tensors_.size(); ++i) { int index = 0; for (; index < in_tensors.size(); index++) { - if (!inputs_visited[index] && IsSameShapeTensor(in_tensors[index], npu_input_tensors_[i])) { + if (!inputs_visited[index] && IsSameShapeInTensor(in_tensors[index], npu_input_tensors_[i])) { void *data = in_tensors[index]->data_c(); if (data == nullptr) { - MS_LOG(ERROR) << model_name_ << " Inputs data is nullptr"; + MS_LOG(ERROR) << "For " << model_name_ << ", the " << i << "th input data is nullptr"; return RET_ERROR; } @@ -106,14 +120,28 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< return RET_ERROR; } + std::vector outputs_visited(out_tensors.size(), false); for (int i = 0; i < npu_output_tensors_.size(); ++i) { - void *data = out_tensors[i]->MutableData(); - if (data == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; + int index = 0; + for (; index < out_tensors.size(); index++) { + if (!outputs_visited[index] && IsSameShapeOutTensor(out_tensors[index], npu_output_tensors_[i])) { + void *data = out_tensors[index]->MutableData(); + if (data == nullptr) { + MS_LOG(ERROR) << "For " << model_name_ << ", the " << i << "th output data is nullptr"; + return RET_ERROR; + } + + memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); + out_tensors[index]->ResetRefCount(); + outputs_visited[index] = true; + break; + } + if (index == out_tensors.size()) { + MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " output tensor for npu executor " + << model_name_; + return RET_ERROR; + } } - memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); - out_tensors[i]->ResetRefCount(); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc index bbfb293d88..300439c948 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc @@ -82,7 +82,7 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vectorout_tensors()[0]->shape(); std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; auto tensor = - new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); + new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); if (tensor == nullptr) { MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc index 79d86d2260..b31b9034e8 100644 --- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc @@ -25,7 +25,7 @@ namespace mindspore::kernel { int ScaleNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { if (scale_parameter_->axis_ < 0) { - scale_parameter_->axis_ = scale_parameter_->axis_ + inputs.size(); + scale_parameter_->axis_ = scale_parameter_->axis_ + inputs[0]->shape().size(); } if (scale_parameter_->axis_ != 1) { MS_LOG(ERROR) << "Npu scale axis attr only support 1, now is " << scale_parameter_->axis_;