parent
a3d4dded12
commit
d45b5b5126
@ -0,0 +1,224 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "nnacl/concat_parameter.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
bool CheckFusion(kernel::LiteKernel *kernel) {
|
||||
auto pre_flag =
|
||||
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||
return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1;
|
||||
});
|
||||
if (!pre_flag) {
|
||||
return false;
|
||||
}
|
||||
auto post_flag =
|
||||
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1;
|
||||
});
|
||||
return post_flag;
|
||||
}
|
||||
|
||||
void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
|
||||
for (auto in_kernel : cur_kernel->in_kernels()) {
|
||||
auto pre_kernel = in_kernel->in_kernels()[0];
|
||||
|
||||
auto pre_out_kernels = pre_kernel->out_kernels();
|
||||
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
|
||||
if (pre_out_kernels[i] == in_kernel) {
|
||||
pre_out_kernels[i] = cur_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pre_kernel->set_out_kernels(pre_out_kernels);
|
||||
|
||||
auto cur_in_kernels = cur_kernel->in_kernels();
|
||||
for (size_t i = 0; i < cur_in_kernels.size(); i++) {
|
||||
if (cur_in_kernels[i] == in_kernel) {
|
||||
cur_in_kernels[i] = pre_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cur_kernel->set_in_kernels(cur_in_kernels);
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), in_kernel));
|
||||
}
|
||||
}
|
||||
|
||||
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
|
||||
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||
auto post_kernel = out_kernel->out_kernels()[0];
|
||||
|
||||
auto post_in_kernels = post_kernel->in_kernels();
|
||||
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||
if (post_in_kernels[i] == out_kernel) {
|
||||
post_in_kernels[i] = cur_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
post_kernel->set_in_kernels(post_in_kernels);
|
||||
|
||||
auto cur_out_kernels = cur_kernel->out_kernels();
|
||||
for (size_t i = 0; i < cur_out_kernels.size(); i++) {
|
||||
if (cur_out_kernels[i] == out_kernel) {
|
||||
cur_out_kernels[i] = post_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cur_kernel->set_out_kernels(cur_out_kernels);
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), out_kernel));
|
||||
}
|
||||
}
|
||||
|
||||
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
|
||||
auto tensors_vec = cur_kernel->in_tensors();
|
||||
for (auto in_kernel : cur_kernel->in_kernels()) {
|
||||
lite::Tensor *cur_tensor = nullptr;
|
||||
auto in_tensor = in_kernel->in_tensors()[0];
|
||||
auto out_tensor = in_kernel->out_tensors()[0];
|
||||
auto pre_kernel = in_kernel->in_kernels()[0];
|
||||
for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) {
|
||||
if (pre_kernel->out_tensors()[i] == in_tensor) {
|
||||
cur_tensor = pre_kernel->out_tensors()[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||
if (tensors_vec[i] == out_tensor) {
|
||||
tensors_vec[i] = cur_tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
cur_kernel->set_in_tensors(tensors_vec);
|
||||
}
|
||||
|
||||
void UpdatePostTensors(kernel::LiteKernel *cur_kernel) {
|
||||
auto tensors_vec = cur_kernel->out_tensors();
|
||||
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||
auto in_tensor = out_kernel->in_tensors()[0];
|
||||
auto out_tensor = out_kernel->out_tensors()[0];
|
||||
auto post_kernel = out_kernel->out_kernels()[0];
|
||||
lite::Tensor *cur_tensor = nullptr;
|
||||
for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) {
|
||||
if (post_kernel->in_tensors()[i] == out_tensor) {
|
||||
cur_tensor = post_kernel->in_tensors()[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||
if (tensors_vec[i] == in_tensor) {
|
||||
tensors_vec[i] = cur_tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
cur_kernel->set_out_tensors(tensors_vec);
|
||||
}
|
||||
|
||||
int TransFormAxis(int axis) {
|
||||
switch (axis) {
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
return 2;
|
||||
case 2:
|
||||
return 3;
|
||||
case 3:
|
||||
case -1:
|
||||
return 1;
|
||||
default:
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) {
|
||||
if (!CheckFusion(kernel)) {
|
||||
return RET_OK;
|
||||
}
|
||||
UpdatePreTensors(kernel);
|
||||
UpdatePostTensors(kernel);
|
||||
UpdatePreKernels(kernel);
|
||||
UpdatePostKernels(kernel);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) {
|
||||
if (!CheckFusion(kernel)) {
|
||||
return RET_OK;
|
||||
}
|
||||
UpdatePreTensors(kernel);
|
||||
UpdatePostTensors(kernel);
|
||||
UpdatePreKernels(kernel);
|
||||
UpdatePostKernels(kernel);
|
||||
auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter());
|
||||
concat_param->axis_ = TransFormAxis(concat_param->axis_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
|
||||
if (kernel->out_kernels().empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw;
|
||||
})) {
|
||||
return RET_OK;
|
||||
}
|
||||
auto pre_kernel = kernel->in_kernels()[0];
|
||||
|
||||
auto pre_out_kernels = pre_kernel->out_kernels();
|
||||
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
|
||||
if (pre_out_kernels[i] == kernel) {
|
||||
pre_out_kernels.erase(pre_out_kernels.begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (const auto &nc2nh : kernel->out_kernels()) {
|
||||
for (const auto &post_kernel : nc2nh->out_kernels()) {
|
||||
auto post_in_kernels = post_kernel->in_kernels();
|
||||
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||
if (post_in_kernels[i] == nc2nh) {
|
||||
post_in_kernels[i] = pre_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
post_kernel->set_in_kernels(post_in_kernels);
|
||||
pre_out_kernels.push_back(post_kernel);
|
||||
}
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), nc2nh));
|
||||
}
|
||||
pre_kernel->set_out_kernels(pre_out_kernels);
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), kernel));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::Fusion() {
|
||||
for (auto kernel : *kernels) {
|
||||
switch (kernel->Type()) {
|
||||
case schema::PrimitiveType_Concat:
|
||||
ConcatFusion(kernel);
|
||||
continue;
|
||||
case schema::PrimitiveType_Add:
|
||||
AddFusion(kernel);
|
||||
continue;
|
||||
case schema::PrimitiveType_Nchw2Nhwc:
|
||||
FormatFusion(kernel);
|
||||
continue;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUFusionPass {
|
||||
public:
|
||||
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; }
|
||||
~NPUFusionPass() = default;
|
||||
int Fusion();
|
||||
|
||||
protected:
|
||||
int ConcatFusion(kernel::LiteKernel *kernel);
|
||||
int AddFusion(kernel::LiteKernel *kernel);
|
||||
int FormatFusion(kernel::LiteKernel *kernel);
|
||||
void UpdatePreKernels(kernel::LiteKernel *kernel);
|
||||
void UpdatePostKernels(kernel::LiteKernel *kernel);
|
||||
|
||||
private:
|
||||
std::vector<kernel::LiteKernel *> *kernels;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
@ -0,0 +1,102 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/ops/nhwc2nchw.h"
|
||||
#include "src/ops/nchw2nhwc.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
|
||||
flatbuffers::FlatBufferBuilder fbb(1024);
|
||||
auto val_offset = schema::CreateNchw2Nhwc(fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o);
|
||||
fbb.Finish(prim_offset);
|
||||
auto buf = fbb.GetBufferPointer();
|
||||
if (buf == nullptr) {
|
||||
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||
if (primitive_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
memcpy(primitive_buf, buf, fbb.GetSize());
|
||||
auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
|
||||
free(primitive_buf);
|
||||
fbb.Clear();
|
||||
return primitive;
|
||||
}
|
||||
|
||||
PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
|
||||
flatbuffers::FlatBufferBuilder fbb(1024);
|
||||
auto val_offset = schema::CreateNhwc2Nchw(fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o);
|
||||
fbb.Finish(prim_offset);
|
||||
auto buf = fbb.GetBufferPointer();
|
||||
if (buf == nullptr) {
|
||||
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||
if (primitive_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
memcpy(primitive_buf, buf, fbb.GetSize());
|
||||
auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
|
||||
free(primitive_buf);
|
||||
fbb.Clear();
|
||||
return primitive;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const InnerContext *ctx, const std::string &name) {
|
||||
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc};
|
||||
auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive();
|
||||
auto *nchw2nhwc_kernel =
|
||||
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key);
|
||||
nchw2nhwc_kernel->set_name(name);
|
||||
return nchw2nhwc_kernel;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const InnerContext *ctx, const std::string &name) {
|
||||
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw};
|
||||
auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive();
|
||||
auto *nhwc2nchw_kernel =
|
||||
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key);
|
||||
nhwc2nchw_kernel->set_name(name);
|
||||
return nhwc2nchw_kernel;
|
||||
}
|
||||
|
||||
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors) {
|
||||
kernel->set_in_tensors(in_tensors);
|
||||
kernel->set_out_tensors(out_tensors);
|
||||
kernel->set_in_kernels(in_kernels);
|
||||
kernel->set_out_kernels(out_kernels);
|
||||
}
|
||||
} // namespace mindspore::lite
|
@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/lite_kernel.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUPassUtils {
|
||||
public:
|
||||
static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
|
||||
const std::string &name);
|
||||
|
||||
static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
|
||||
const std::string &name);
|
||||
|
||||
static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors);
|
||||
|
||||
private:
|
||||
static PrimitiveC *CreateNchw2NhwcPrimitive();
|
||||
|
||||
static PrimitiveC *CreateNhwc2NchwPrimitive();
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
@ -0,0 +1,201 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *after_kernel) {
|
||||
std::vector<kernel::LiteKernel *> out_kernels;
|
||||
|
||||
for (auto out_kernel : kernel->out_kernels()) {
|
||||
if (out_kernel == after_kernel) {
|
||||
out_kernels.push_back(trans_kernel);
|
||||
} else {
|
||||
out_kernels.push_back(out_kernel);
|
||||
}
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *before_kernel) {
|
||||
std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]};
|
||||
for (int i = 1; i < kernel->in_tensors().size(); i++) {
|
||||
cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]);
|
||||
}
|
||||
std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel};
|
||||
for (int i = 0; i < kernel->in_kernels().size(); i++) {
|
||||
auto in_kernel = kernel->in_kernels()[i];
|
||||
if (in_kernel != kernel) {
|
||||
cur_in_kernels.push_back(in_kernel);
|
||||
}
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors,
|
||||
kernel->out_tensors());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
auto kernel = *it;
|
||||
bool is_input_kernel = kernel->in_kernels().empty();
|
||||
if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU ||
|
||||
npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) {
|
||||
kernel::LiteKernel *before_kernel = nullptr;
|
||||
if (!is_input_kernel) {
|
||||
before_kernel = kernel->in_kernels()[0];
|
||||
}
|
||||
// Create pre transform kernel out tensors.
|
||||
std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3],
|
||||
kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]};
|
||||
auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR);
|
||||
std::vector<Tensor *> pre_trans_out_tensors = {tensor};
|
||||
all_tensors->push_back(pre_trans_out_tensors[0]);
|
||||
// Replace the output tensor of the previous node
|
||||
auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++);
|
||||
auto *pre_trans_kernel =
|
||||
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
|
||||
// Insert Nhwc2Nchw into the front of the current queue
|
||||
all_kernels->push_back(pre_trans_kernel);
|
||||
// Replace the output kernel of the previous node
|
||||
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
|
||||
if (is_input_kernel) {
|
||||
pre_trans_in_kernel = {};
|
||||
} else {
|
||||
pre_trans_in_kernel = {before_kernel};
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]},
|
||||
pre_trans_out_tensors);
|
||||
|
||||
if (before_kernel != nullptr) {
|
||||
UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel);
|
||||
}
|
||||
UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
auto kernel = *it;
|
||||
// Single output multiple references
|
||||
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||
auto next_kernel = kernel->out_kernels().at(i);
|
||||
if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) {
|
||||
continue;
|
||||
}
|
||||
// Change format the output of the current kernel nhwc->nchw
|
||||
auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1],
|
||||
kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]};
|
||||
auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR);
|
||||
std::vector<Tensor *> post_trans_out_tensors = {tensor};
|
||||
all_tensors->push_back(post_trans_out_tensors[0]);
|
||||
// Use the output tensor of the current node as the input tensor of the post-conversion operator
|
||||
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
|
||||
auto *post_trans_kernel =
|
||||
NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name);
|
||||
// Replace the input tensor of the next node
|
||||
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(),
|
||||
post_trans_out_tensors);
|
||||
// Directly insert in the back, will not affect the topological sort
|
||||
all_kernels->push_back(post_trans_kernel);
|
||||
UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel);
|
||||
UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *next_kernel) {
|
||||
std::vector<kernel::LiteKernel *> cur_out_kernels;
|
||||
for (auto out_kernel : kernel->out_kernels()) {
|
||||
if (out_kernel == next_kernel) {
|
||||
cur_out_kernels.push_back(trans_kernel);
|
||||
} else {
|
||||
cur_out_kernels.push_back(out_kernel);
|
||||
}
|
||||
}
|
||||
auto kernel_out_tensor = kernel->out_tensors()[0];
|
||||
// Change format the output of the current kernel nhwc->nchw
|
||||
std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3],
|
||||
kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]};
|
||||
kernel_out_tensor->set_format(schema::Format_NCHW);
|
||||
kernel_out_tensor->set_shape(kernel_out_new_shapes);
|
||||
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *next_kernel) {
|
||||
std::vector<Tensor *> next_in_tensors;
|
||||
for (auto next_in_tensor : next_kernel->in_tensors()) {
|
||||
if (next_in_tensor != kernel->out_tensors()[0]) {
|
||||
next_in_tensors.push_back(next_in_tensor);
|
||||
} else {
|
||||
next_in_tensors.push_back(trans_kernel->out_tensors()[0]);
|
||||
}
|
||||
}
|
||||
next_kernel->set_in_tensors(next_in_tensors);
|
||||
std::vector<kernel::LiteKernel *> next_in_kernels;
|
||||
for (auto in_kernel : next_kernel->in_kernels()) {
|
||||
if (in_kernel == kernel) {
|
||||
next_in_kernels.push_back(trans_kernel);
|
||||
} else {
|
||||
next_in_kernels.push_back(in_kernel);
|
||||
}
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors,
|
||||
next_kernel->out_tensors());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
if (context->IsNpuEnabled()) {
|
||||
std::vector<kernel::LiteKernel *> new_kernels;
|
||||
|
||||
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
|
||||
auto kernel = *it;
|
||||
if (kernel->desc().arch != kNPU) {
|
||||
new_kernels.push_back(kernel);
|
||||
continue;
|
||||
}
|
||||
if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
|
||||
InsertPreNode(context, it, &new_kernels, all_tensors);
|
||||
new_kernels.push_back(kernel);
|
||||
InsertPostNode(context, it, &new_kernels, all_tensors);
|
||||
} else {
|
||||
new_kernels.push_back(kernel);
|
||||
}
|
||||
}
|
||||
all_kernels->clear();
|
||||
for (int i = 0; i < new_kernels.size(); i++) {
|
||||
all_kernels->push_back(new_kernels[i]);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite
|
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUTransformPass {
|
||||
public:
|
||||
int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors);
|
||||
|
||||
private:
|
||||
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *after_kernel);
|
||||
|
||||
int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *before_kernel);
|
||||
|
||||
int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *after_kernel);
|
||||
|
||||
int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *next_kernel);
|
||||
|
||||
int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
|
||||
|
||||
int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
|
||||
|
||||
private:
|
||||
int total = 0;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
Loading…
Reference in new issue