|
|
|
@ -38,7 +38,24 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
|
|
|
|
|
std::vector<int> GetNpuTensorShape(int dim, std::shared_ptr<hiai::AiTensor> npu_tensor) {
|
|
|
|
|
std::vector<int> npu_shape;
|
|
|
|
|
if (dim > 0) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetNumber());
|
|
|
|
|
}
|
|
|
|
|
if (dim > 1) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel());
|
|
|
|
|
}
|
|
|
|
|
if (dim > 2) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetHeight());
|
|
|
|
|
}
|
|
|
|
|
if (dim > 3) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetWidth());
|
|
|
|
|
}
|
|
|
|
|
return npu_shape;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool IsSameShapeInTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
|
|
|
|
|
if (tensor->shape().size() > 4) {
|
|
|
|
|
MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
|
|
|
|
|
return false;
|
|
|
|
@ -49,18 +66,15 @@ bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tenso
|
|
|
|
|
tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() &&
|
|
|
|
|
tensor->Width() == npu_tensor->GetTensorDimension().GetWidth();
|
|
|
|
|
}
|
|
|
|
|
std::vector<int> npu_shape;
|
|
|
|
|
auto dim = tensor->shape().size();
|
|
|
|
|
if (dim > 0) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetNumber());
|
|
|
|
|
}
|
|
|
|
|
if (dim > 1) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel());
|
|
|
|
|
}
|
|
|
|
|
if (dim > 2) {
|
|
|
|
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetHeight());
|
|
|
|
|
return GetNpuTensorShape(tensor->shape().size(), npu_tensor) == tensor->shape();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool IsSameShapeOutTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
|
|
|
|
|
if (tensor->shape().size() > 4) {
|
|
|
|
|
MS_LOG(ERROR) << "Npu does not support output tensor dims greater than 4";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return npu_shape == tensor->shape();
|
|
|
|
|
return GetNpuTensorShape(tensor->shape().size(), npu_tensor) == tensor->shape();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
|
|
|
@ -72,10 +86,10 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
|
|
|
|
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
|
|
|
|
|
int index = 0;
|
|
|
|
|
for (; index < in_tensors.size(); index++) {
|
|
|
|
|
if (!inputs_visited[index] && IsSameShapeTensor(in_tensors[index], npu_input_tensors_[i])) {
|
|
|
|
|
if (!inputs_visited[index] && IsSameShapeInTensor(in_tensors[index], npu_input_tensors_[i])) {
|
|
|
|
|
void *data = in_tensors[index]->data_c();
|
|
|
|
|
if (data == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << model_name_ << " Inputs data is nullptr";
|
|
|
|
|
MS_LOG(ERROR) << "For " << model_name_ << ", the " << i << "th input data is nullptr";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -106,14 +120,28 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<bool> outputs_visited(out_tensors.size(), false);
|
|
|
|
|
for (int i = 0; i < npu_output_tensors_.size(); ++i) {
|
|
|
|
|
void *data = out_tensors[i]->MutableData();
|
|
|
|
|
if (data == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Malloc buffer failed.";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
int index = 0;
|
|
|
|
|
for (; index < out_tensors.size(); index++) {
|
|
|
|
|
if (!outputs_visited[index] && IsSameShapeOutTensor(out_tensors[index], npu_output_tensors_[i])) {
|
|
|
|
|
void *data = out_tensors[index]->MutableData();
|
|
|
|
|
if (data == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "For " << model_name_ << ", the " << i << "th output data is nullptr";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
|
|
|
|
out_tensors[index]->ResetRefCount();
|
|
|
|
|
outputs_visited[index] = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (index == out_tensors.size()) {
|
|
|
|
|
MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " output tensor for npu executor "
|
|
|
|
|
<< model_name_;
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
|
|
|
|
out_tensors[i]->ResetRefCount();
|
|
|
|
|
}
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
|