|
|
|
@ -513,36 +513,36 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TbeKernelBuild::GenDescJson(const shared_ptr<mindspore::AnfNode> &anf_node, size_t out_idx,
|
|
|
|
|
nlohmann::json *output_desc) {
|
|
|
|
|
void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
|
|
|
|
|
size_t desc_output_idx, nlohmann::json *output_desc) {
|
|
|
|
|
std::string output_desc_name = anf_node->fullname_with_scope();
|
|
|
|
|
if (out_idx > 0) {
|
|
|
|
|
output_desc_name = output_desc_name + "_" + std::to_string(out_idx);
|
|
|
|
|
if (node_out_idx > 0) {
|
|
|
|
|
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
|
|
|
|
|
}
|
|
|
|
|
(*output_desc)["name"] = NormalizeFullScopeName(output_desc_name);
|
|
|
|
|
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, out_idx);
|
|
|
|
|
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
|
|
|
|
|
(*output_desc)["data_type"] = tbe::TypeIdToString(type_id);
|
|
|
|
|
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, out_idx);
|
|
|
|
|
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
|
|
|
|
|
if (ori_shape.empty()) {
|
|
|
|
|
ori_shape.emplace_back(1);
|
|
|
|
|
}
|
|
|
|
|
(*output_desc)["ori_shape"] = ori_shape;
|
|
|
|
|
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, out_idx);
|
|
|
|
|
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
|
|
|
|
|
if (shape.empty()) {
|
|
|
|
|
shape.emplace_back(1);
|
|
|
|
|
}
|
|
|
|
|
(*output_desc)["shape"] = shape;
|
|
|
|
|
auto format = AnfAlgo::GetOutputFormat(anf_node, out_idx);
|
|
|
|
|
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
|
|
|
|
|
if (format == kOpFormat_DEFAULT) {
|
|
|
|
|
if (ori_shape.size() == 4) {
|
|
|
|
|
format = kOpFormat_NCHW;
|
|
|
|
|
} else {
|
|
|
|
|
format = "ND";
|
|
|
|
|
format = kOpFormat_ND;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
(*output_desc)["format"] = format;
|
|
|
|
|
(*output_desc)["ori_format"] = kOpFormat_NCHW;
|
|
|
|
|
(*output_desc)["output_index"] = out_idx;
|
|
|
|
|
(*output_desc)["output_index"] = desc_output_idx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
|
|
|
|
@ -605,7 +605,7 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode>
|
|
|
|
|
MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx;
|
|
|
|
|
// "output_desc"
|
|
|
|
|
nlohmann::json output_desc;
|
|
|
|
|
GenDescJson(real_node, real_idx, &output_desc);
|
|
|
|
|
GenDescJson(real_node, real_idx, real_idx, &output_desc);
|
|
|
|
|
output_desc_list.push_back(output_desc);
|
|
|
|
|
(*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope());
|
|
|
|
|
}
|
|
|
|
@ -653,7 +653,7 @@ size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool i
|
|
|
|
|
return (op_info->inputs_ptr().size() + 1 - cnode->inputs().size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TbeKernelBuild::GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode,
|
|
|
|
|
bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
|
|
|
|
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
|
|
|
|
|
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
|
|
|
|
|
MS_EXCEPTION_IF_NULL(cnode);
|
|
|
|
@ -666,7 +666,7 @@ bool TbeKernelBuild::GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode
|
|
|
|
|
size_t real_idx = kernel_idx.second;
|
|
|
|
|
MS_LOG(INFO) << "real name" << real_node->fullname_with_scope() << "index:" << real_idx;
|
|
|
|
|
nlohmann::json input_desc;
|
|
|
|
|
GenDescJson(real_node, real_idx, &input_desc);
|
|
|
|
|
GenDescJson(real_node, real_idx, real_idx, &input_desc);
|
|
|
|
|
if (is_dynamic_input) {
|
|
|
|
|
MS_LOG(INFO) << "node has dynamic input.";
|
|
|
|
|
input_desc["dyn_index"] = (i - 1);
|
|
|
|
@ -687,6 +687,66 @@ bool TbeKernelBuild::GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) {
|
|
|
|
|
std::vector<size_t> desc_output_index = {};
|
|
|
|
|
bool find_reused = false;
|
|
|
|
|
size_t reused_num = 0;
|
|
|
|
|
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
|
|
|
|
|
auto output_use_num_item = output_used_nums[idx];
|
|
|
|
|
MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item;
|
|
|
|
|
if (output_use_num_item == 1 || output_use_num_item == 0) {
|
|
|
|
|
desc_output_index.emplace_back(idx);
|
|
|
|
|
} else {
|
|
|
|
|
if (!find_reused) {
|
|
|
|
|
desc_output_index.emplace_back(idx);
|
|
|
|
|
} else {
|
|
|
|
|
desc_output_index.emplace_back(output_used_nums[idx - 1]);
|
|
|
|
|
}
|
|
|
|
|
reused_num += (output_use_num_item - 1);
|
|
|
|
|
find_reused = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1;
|
|
|
|
|
for (size_t i = 0; i < reused_num; ++i) {
|
|
|
|
|
desc_output_index.emplace_back(pad_value);
|
|
|
|
|
}
|
|
|
|
|
return desc_output_index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
|
|
|
|
|
std::vector<nlohmann::json> *output_desc_list) {
|
|
|
|
|
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
|
|
|
|
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
|
|
|
|
|
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
|
|
|
|
|
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
|
|
|
|
|
if (output_used_nums.size() != output_size) {
|
|
|
|
|
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
|
|
|
|
|
<< " is not match output used num(" << output_used_nums.size() << ")";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
auto desc_output_index = GetDescOutputIndex(output_used_nums);
|
|
|
|
|
for (size_t i = 0; i < output_size; ++i) {
|
|
|
|
|
MS_LOG(INFO) << "Fusion index: " << i << ", desc_output_index: " << desc_output_index[i];
|
|
|
|
|
nlohmann::json output_desc;
|
|
|
|
|
GenDescJson(cnode, i, desc_output_index[i], &output_desc);
|
|
|
|
|
output_desc_list->emplace_back(output_desc);
|
|
|
|
|
}
|
|
|
|
|
for (size_t j = output_size; j < desc_output_index.size(); ++j) {
|
|
|
|
|
MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j];
|
|
|
|
|
nlohmann::json output_desc;
|
|
|
|
|
GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc);
|
|
|
|
|
output_desc_list->emplace_back(output_desc);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (size_t i = 0; i < output_size; ++i) {
|
|
|
|
|
nlohmann::json output_desc;
|
|
|
|
|
GenDescJson(cnode, i, i, &output_desc);
|
|
|
|
|
output_desc_list->push_back(output_desc);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
|
|
|
|
|
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
|
|
|
|
|
nlohmann::json *compute_op_str, std::string *fusion_kernel_name,
|
|
|
|
@ -696,28 +756,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n
|
|
|
|
|
MS_EXCEPTION_IF_NULL(cnode);
|
|
|
|
|
// gen input desc
|
|
|
|
|
std::vector<nlohmann::json> input_desc_list;
|
|
|
|
|
(void)GenFusionComputeInputeJson(cnode, layer_iter, &input_desc_list, index);
|
|
|
|
|
(void)GenFusionComputeInputJson(cnode, layer_iter, &input_desc_list, index);
|
|
|
|
|
(*compute_op_str)["input_desc"] = input_desc_list;
|
|
|
|
|
// gen output desc
|
|
|
|
|
std::vector<nlohmann::json> output_desc_list;
|
|
|
|
|
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
|
|
|
|
for (size_t i = 0; i < output_size; ++i) {
|
|
|
|
|
nlohmann::json output_desc;
|
|
|
|
|
GenDescJson(cnode, i, &output_desc);
|
|
|
|
|
output_desc_list.push_back(output_desc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimConv2D->name()) {
|
|
|
|
|
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, compute_node)) {
|
|
|
|
|
auto output_used_num = AnfAlgo::GetNodeAttr<size_t>(compute_node, kAttrOutputUsedNum);
|
|
|
|
|
for (size_t i = output_size; i < output_used_num; ++i) {
|
|
|
|
|
nlohmann::json output_desc;
|
|
|
|
|
GenReusedOutputDesc(cnode, i, 0, &output_desc);
|
|
|
|
|
output_desc_list.push_back(output_desc);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) {
|
|
|
|
|
MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope();
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(*compute_op_str)["output_desc"] = output_desc_list;
|
|
|
|
|
// gen others
|
|
|
|
|
auto type = AnfAlgo::GetCNodeName(cnode);
|
|
|
|
|