From: @yeyunpeng2020
Reviewed-by: 
Signed-off-by:
pull/10585/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 5ac88ad93a

@ -49,6 +49,7 @@ bool NPUManager::CheckEMUIVersion() {
auto version = emui_str.substr(pos + 1);
int ret = CompareVersion(version, "10.0.0");
if (ret < 0) {
MS_LOG(WARNING) << "EMUI version " << version << " less than 10.0.0";
return false;
}
}
@ -80,8 +81,9 @@ bool NPUManager::CheckDDKVersion() {
auto client = std::make_shared<hiai::AiModelMngerClient>();
if (client->GetVersion() != nullptr) {
std::string version = client->GetVersion();
int ret = CompareVersion(version, "100.330.010.011");
int ret = CompareVersion(version, "100.320.010.023");
if (ret < 0) {
MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.010.023";
return false;
}
}
@ -96,7 +98,7 @@ bool NPUManager::IsSupportNPU() {
MS_LOG(INFO) << "The current device support NPU.";
} else {
is_support_ = false;
MS_LOG(INFO) << "The current device NOT SUPPORT NPU.";
MS_LOG(WARNING) << "The current device NOT SUPPORT NPU.";
}
return is_support_;
} else {
@ -130,6 +132,7 @@ bool NPUManager::IsKirinChip() {
cpu_info.close();
return true;
} else {
MS_LOG(WARNING) << "Unsupported KirinChip " << kirin_number;
cpu_info.close();
return false;
}

@ -15,6 +15,7 @@
*/
#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h"
#include <vector>
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
#include "src/lite_kernel.h"
#include "nnacl/concat_parameter.h"
@ -22,14 +23,16 @@ namespace mindspore::lite {
bool CheckFusion(kernel::LiteKernel *kernel) {
auto pre_flag =
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *in_kernel) {
return in_kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && in_kernel->out_kernels().size() == 1;
return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(in_kernel)) &&
in_kernel->out_kernels().size() == 1;
});
if (!pre_flag) {
return false;
}
auto post_flag = std::all_of(
kernel->out_kernels().begin(), kernel->out_kernels().end(),
[](const kernel::LiteKernel *out_kernel) { return out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; });
auto post_flag =
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *out_kernel) {
return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(out_kernel));
});
return post_flag;
}
@ -37,15 +40,17 @@ bool CheckFormatFusion(kernel::LiteKernel *kernel) {
if (kernel->out_kernels().empty()) {
return false;
}
if (kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) {
return std::all_of(
kernel->out_kernels().begin(), kernel->out_kernels().end(),
[](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; });
if (NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel))) {
return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
[](const kernel::LiteKernel *kernel) {
return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel));
});
}
if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) {
return std::all_of(
kernel->out_kernels().begin(), kernel->out_kernels().end(),
[](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; });
if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel))) {
return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
[](const kernel::LiteKernel *kernel) {
return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel));
});
}
return false;
}
@ -60,6 +65,10 @@ void NPUFusionPass::RemoveAndFreeKernel(kernel::LiteKernel *cur_kernel) {
void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
for (auto in_kernel : cur_kernel->in_kernels()) {
// graph in kernel
if (in_kernel->in_kernels().empty()) {
continue;
}
auto pre_kernel = in_kernel->in_kernels()[0];
auto pre_out_kernels = pre_kernel->out_kernels();
@ -85,6 +94,10 @@ void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
for (auto out_kernel : cur_kernel->out_kernels()) {
// graph out kernel
if (out_kernel->out_kernels().empty()) {
continue;
}
auto post_kernel = out_kernel->out_kernels()[0];
auto post_in_kernels = post_kernel->in_kernels();
@ -183,22 +196,13 @@ int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) {
int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
auto pre_kernel = kernel->in_kernels()[0];
auto in_tensor = kernel->in_tensors()[0];
auto out_tensor = kernel->out_tensors()[0];
auto tensor_itr = std::find(pre_kernel->out_tensors().begin(), pre_kernel->out_tensors().end(), in_tensor);
if (tensor_itr != pre_kernel->out_tensors().end()) {
in_tensor = *tensor_itr;
} else {
MS_LOG(ERROR) << "Can't find the connneted tensor between kernel " << kernel->name() << " and it's pre_kernel.";
return RET_ERROR;
}
std::vector<kernel::LiteKernel *> pre_insert_kernels;
for (const auto &trans_kernel : kernel->out_kernels()) {
for (const auto &post_kernel : trans_kernel->out_kernels()) {
// update tensor
auto tensors_vec = post_kernel->in_tensors();
for (size_t i = 0; i < tensors_vec.size(); i++) {
if (tensors_vec[i] == out_tensor) {
if (tensors_vec[i] == trans_kernel->out_tensors()[0]) {
tensors_vec[i] = in_tensor;
break;
}
@ -218,10 +222,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
RemoveAndFreeKernel(trans_kernel);
}
}
auto pre_out_kernels = pre_kernel->out_kernels();
auto itr = find(pre_out_kernels.begin(), pre_out_kernels.end(), kernel);
pre_out_kernels.insert(itr, pre_insert_kernels.begin(), pre_insert_kernels.end());
pre_kernel->set_in_kernels(pre_out_kernels);
pre_kernel->set_out_kernels(pre_insert_kernels);
RemoveAndFreeKernel(kernel);
return RET_OK;
}
@ -229,7 +230,8 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
int NPUFusionPass::Run() {
for (size_t i = 0; i < kernels->size(); i++) {
auto kernel = (*kernels)[i];
if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc || kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) {
if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)) ||
NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel))) {
if (CheckFormatFusion(kernel)) {
i--;
FormatFusion(kernel);

@ -30,11 +30,13 @@ int GetInsertState(kernel::LiteKernel *kernel) {
return InsertNone;
}
auto pre_flag =
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(),
[](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; });
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) {
return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel));
});
auto post_flag =
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
[](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; });
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel));
});
if (pre_flag && !post_flag) {
return PostInsert;
}
@ -48,7 +50,7 @@ int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::L
std::vector<kernel::LiteKernel *> *trans_kernels,
std::vector<Tensor *> *all_tensors) {
for (auto in_kernel : kernel->in_kernels()) {
if (in_kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) {
if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(in_kernel))) {
continue;
}
auto nhwc_shape = in_kernel->out_tensors()[0]->shape();
@ -86,7 +88,7 @@ int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel::
std::vector<kernel::LiteKernel *> *trans_kernels,
std::vector<Tensor *> *all_tensors) {
for (auto out_kernel : kernel->out_kernels()) {
if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) {
if (NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(out_kernel))) {
continue;
}
auto nhwc_shape = kernel->out_tensors()[0]->shape();

@ -14,17 +14,19 @@
* limitations under the License.
*/
#include "src/kernel_registry.h"
#include "src/ops/nhwc2nchw.h"
#include "src/ops/nchw2nhwc.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
#include "src/ops/transpose.h"
#include "nnacl/transpose.h"
#include "src/ops/populate/populate_register.h"
#include "src/runtime/kernel/arm/fp32/transpose_fp32.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
using kernel::KERNEL_ARCH::kNPU;
PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
PrimitiveC *NPUPassUtils::CreateTransposePrimitive() {
flatbuffers::FlatBufferBuilder fbb(1024);
auto val_offset = schema::CreateNchw2Nhwc(fbb);
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o);
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Transpose, val_offset.o);
fbb.Finish(prim_offset);
auto buf = fbb.GetBufferPointer();
if (buf == nullptr) {
@ -39,56 +41,72 @@ PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
return nullptr;
}
memcpy(primitive_buf, buf, fbb.GetSize());
auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
auto *primitive = PrimitiveC::NewPrimitiveC<Transpose>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
free(primitive_buf);
fbb.Clear();
return primitive;
}
PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
flatbuffers::FlatBufferBuilder fbb(1024);
auto val_offset = schema::CreateNhwc2Nchw(fbb);
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o);
fbb.Finish(prim_offset);
auto buf = fbb.GetBufferPointer();
if (buf == nullptr) {
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
fbb.Clear();
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const InnerContext *ctx, const std::string &name) {
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose};
auto nchw2nhwc_primitive = CreateTransposePrimitive();
auto *transpose_param = reinterpret_cast<TransposeParameter *>(malloc(sizeof(TransposeParameter)));
if (transpose_param == nullptr) {
MS_LOG(ERROR) << "malloc TransposeParameter failed.";
return nullptr;
}
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
if (primitive_buf == nullptr) {
MS_LOG(ERROR) << "Malloc primitive buffer failed.";
fbb.Clear();
memset(transpose_param, 0, sizeof(TransposeParameter));
transpose_param->op_parameter_.type_ = nchw2nhwc_primitive->Type();
transpose_param->perm_[0] = 0;
transpose_param->perm_[1] = 2;
transpose_param->perm_[2] = 3;
transpose_param->perm_[3] = 1;
transpose_param->num_axes_ = 4;
auto kernel = new (std::nothrow) kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param),
in_tensors, out_tensors, ctx, nchw2nhwc_primitive);
if (kernel != nullptr) {
kernel->set_desc(key);
} else {
MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed.";
return nullptr;
}
memcpy(primitive_buf, buf, fbb.GetSize());
auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
free(primitive_buf);
fbb.Clear();
return primitive;
}
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const InnerContext *ctx, const std::string &name) {
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc};
auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive();
auto *nchw2nhwc_kernel =
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key);
nchw2nhwc_kernel->set_name(name);
return nchw2nhwc_kernel;
kernel->set_name(name);
return kernel;
}
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const InnerContext *ctx, const std::string &name) {
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw};
auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive();
auto *nhwc2nchw_kernel =
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key);
nhwc2nchw_kernel->set_name(name);
return nhwc2nchw_kernel;
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose};
auto nhwc2nchw_primitive = CreateTransposePrimitive();
auto *transpose_param = reinterpret_cast<TransposeParameter *>(malloc(sizeof(TransposeParameter)));
if (transpose_param == nullptr) {
MS_LOG(ERROR) << "malloc TransposeParameter failed.";
return nullptr;
}
memset(transpose_param, 0, sizeof(TransposeParameter));
transpose_param->op_parameter_.type_ = nhwc2nchw_primitive->Type();
transpose_param->perm_[0] = 0;
transpose_param->perm_[1] = 3;
transpose_param->perm_[2] = 1;
transpose_param->perm_[3] = 2;
transpose_param->num_axes_ = 4;
auto kernel = new (std::nothrow) kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param),
in_tensors, out_tensors, ctx, nhwc2nchw_primitive);
if (kernel != nullptr) {
kernel->set_desc(key);
} else {
MS_LOG(ERROR) << "New Nhwc2Nchw Kernel failed.";
return nullptr;
}
kernel->set_name(name);
return kernel;
}
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
@ -173,4 +191,39 @@ void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, k
post_kernel->set_in_kernels(post_in_kernels);
post_kernel->set_in_tensors({post_in_tensors});
}
bool NPUPassUtils::IsNhwc2Nchw(kernel::LiteKernel *kernel) {
if (kernel->Type() != schema::PrimitiveType_Transpose) {
return false;
}
auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter());
if (parameter->num_axes_ != 4) {
return false;
}
std::vector<int> perm = {parameter->perm_[0], parameter->perm_[1], parameter->perm_[2], parameter->perm_[3]};
std::vector<int> nh2nc_perm = {0, 3, 1, 2};
if (nh2nc_perm == perm) {
return true;
}
return false;
}
bool NPUPassUtils::IsNchw2Nhwc(kernel::LiteKernel *kernel) {
if (kernel->Type() != schema::PrimitiveType_Transpose) {
return false;
}
auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter());
if (parameter->num_axes_ != 4) {
return false;
}
std::vector<int> perm = {parameter->perm_[0], parameter->perm_[1], parameter->perm_[2], parameter->perm_[3]};
std::vector<int> nh2nc_perm = {0, 2, 3, 1};
if (nh2nc_perm == perm) {
return true;
}
return false;
}
} // namespace mindspore::lite

@ -47,10 +47,12 @@ class NPUPassUtils {
static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel);
private:
static PrimitiveC *CreateNchw2NhwcPrimitive();
static bool IsNhwc2Nchw(kernel::LiteKernel *kernel);
static bool IsNchw2Nhwc(kernel::LiteKernel *kernel);
static PrimitiveC *CreateNhwc2NchwPrimitive();
private:
static PrimitiveC *CreateTransposePrimitive();
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_

@ -103,7 +103,6 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
// input come from npu
auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
if (npu_op != nullptr) {
npu_op->GetOutputDesc(0).GetName();
node_input_op.push_back(npu_op);
is_weight_tensor = false;
break;
@ -168,14 +167,13 @@ std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; }
int SubGraphNpuKernel::Init() {
if (!is_compiled_) {
name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index());
auto model_buffer_data = BuildIRModel();
if (model_buffer_data == nullptr) {
MS_LOG(ERROR) << "Build IR model failed.";
return RET_ERROR;
}
name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index());
mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(),
context_->GetNpuInfo().frequency_);

@ -36,7 +36,7 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
}
op_->set_input_x(*npu_inputs[0]);
op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_)));
op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->src_type_)));
op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(inputs[0]->data_type())));
return RET_OK;
}

@ -1,3 +1,3 @@
mobilenet_v1_1.0_224.tflite 1.5
squeezenet.tflite 1.5
inception_v3.tflite 0.5
mobilenet_v1_1.0_224.tflite 2.5
squeezenet.tflite 2.5
inception_v3.tflite 1

Loading…
Cancel
Save