support buffer fusion

pull/566/head
jjfeing 5 years ago
parent aad5771a62
commit 5e7e34f54a

@ -513,36 +513,36 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
return true;
}
void TbeKernelBuild::GenDescJson(const shared_ptr<mindspore::AnfNode> &anf_node, size_t out_idx,
nlohmann::json *output_desc) {
void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc) {
std::string output_desc_name = anf_node->fullname_with_scope();
if (out_idx > 0) {
output_desc_name = output_desc_name + "_" + std::to_string(out_idx);
if (node_out_idx > 0) {
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
}
(*output_desc)["name"] = NormalizeFullScopeName(output_desc_name);
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, out_idx);
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
(*output_desc)["data_type"] = tbe::TypeIdToString(type_id);
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, out_idx);
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
if (ori_shape.empty()) {
ori_shape.emplace_back(1);
}
(*output_desc)["ori_shape"] = ori_shape;
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, out_idx);
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
if (shape.empty()) {
shape.emplace_back(1);
}
(*output_desc)["shape"] = shape;
auto format = AnfAlgo::GetOutputFormat(anf_node, out_idx);
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
if (format == kOpFormat_DEFAULT) {
if (ori_shape.size() == 4) {
format = kOpFormat_NCHW;
} else {
format = "ND";
format = kOpFormat_ND;
}
}
(*output_desc)["format"] = format;
(*output_desc)["ori_format"] = kOpFormat_NCHW;
(*output_desc)["output_index"] = out_idx;
(*output_desc)["output_index"] = desc_output_idx;
}
void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
@ -605,7 +605,7 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode>
MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx;
// "output_desc"
nlohmann::json output_desc;
GenDescJson(real_node, real_idx, &output_desc);
GenDescJson(real_node, real_idx, real_idx, &output_desc);
output_desc_list.push_back(output_desc);
(*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope());
}
@ -653,9 +653,9 @@ size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool i
return (op_info->inputs_ptr().size() + 1 - cnode->inputs().size());
}
bool TbeKernelBuild::GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(input_desc_list);
bool is_dynamic_input = IsDynamicInput(cnode);
@ -666,7 +666,7 @@ bool TbeKernelBuild::GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode
size_t real_idx = kernel_idx.second;
MS_LOG(INFO) << "real name" << real_node->fullname_with_scope() << "index:" << real_idx;
nlohmann::json input_desc;
GenDescJson(real_node, real_idx, &input_desc);
GenDescJson(real_node, real_idx, real_idx, &input_desc);
if (is_dynamic_input) {
MS_LOG(INFO) << "node has dynamic input.";
input_desc["dyn_index"] = (i - 1);
@ -687,6 +687,66 @@ bool TbeKernelBuild::GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode
return true;
}
std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) {
std::vector<size_t> desc_output_index = {};
bool find_reused = false;
size_t reused_num = 0;
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
auto output_use_num_item = output_used_nums[idx];
MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item;
if (output_use_num_item == 1 || output_use_num_item == 0) {
desc_output_index.emplace_back(idx);
} else {
if (!find_reused) {
desc_output_index.emplace_back(idx);
} else {
desc_output_index.emplace_back(output_used_nums[idx - 1]);
}
reused_num += (output_use_num_item - 1);
find_reused = true;
}
}
auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1;
for (size_t i = 0; i < reused_num; ++i) {
desc_output_index.emplace_back(pad_value);
}
return desc_output_index;
}
bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list) {
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
if (output_used_nums.size() != output_size) {
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
<< " is not match output used num(" << output_used_nums.size() << ")";
return false;
}
auto desc_output_index = GetDescOutputIndex(output_used_nums);
for (size_t i = 0; i < output_size; ++i) {
MS_LOG(INFO) << "Fusion index: " << i << ", desc_output_index: " << desc_output_index[i];
nlohmann::json output_desc;
GenDescJson(cnode, i, desc_output_index[i], &output_desc);
output_desc_list->emplace_back(output_desc);
}
for (size_t j = output_size; j < desc_output_index.size(); ++j) {
MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j];
nlohmann::json output_desc;
GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc);
output_desc_list->emplace_back(output_desc);
}
} else {
for (size_t i = 0; i < output_size; ++i) {
nlohmann::json output_desc;
GenDescJson(cnode, i, i, &output_desc);
output_desc_list->push_back(output_desc);
}
}
return true;
}
bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
nlohmann::json *compute_op_str, std::string *fusion_kernel_name,
@ -696,28 +756,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n
MS_EXCEPTION_IF_NULL(cnode);
// gen input desc
std::vector<nlohmann::json> input_desc_list;
(void)GenFusionComputeInputeJson(cnode, layer_iter, &input_desc_list, index);
(void)GenFusionComputeInputJson(cnode, layer_iter, &input_desc_list, index);
(*compute_op_str)["input_desc"] = input_desc_list;
// gen output desc
std::vector<nlohmann::json> output_desc_list;
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
for (size_t i = 0; i < output_size; ++i) {
nlohmann::json output_desc;
GenDescJson(cnode, i, &output_desc);
output_desc_list.push_back(output_desc);
}
if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimConv2D->name()) {
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, compute_node)) {
auto output_used_num = AnfAlgo::GetNodeAttr<size_t>(compute_node, kAttrOutputUsedNum);
for (size_t i = output_size; i < output_used_num; ++i) {
nlohmann::json output_desc;
GenReusedOutputDesc(cnode, i, 0, &output_desc);
output_desc_list.push_back(output_desc);
}
}
if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) {
MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope();
return false;
}
(*compute_op_str)["output_desc"] = output_desc_list;
// gen others
auto type = AnfAlgo::GetCNodeName(cnode);

@ -53,11 +53,14 @@ class TbeKernelBuild {
static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index);
static bool GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t out_idx,
nlohmann::json *output_desc);
static bool GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index);
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc);
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc);
static size_t GetIOSizeImpl(const nlohmann::json &desc);

Loading…
Cancel
Save