!1214 add superkernel off attr for graph

From: @ni100die
Reviewed-by: @xchu42,@ji_chen
Signed-off-by:
pull/1214/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit e119b3c951

@ -434,7 +434,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
"Assign node %s continuous input memory failed.", node->GetName().c_str()) "Assign node %s continuous input memory failed.", node->GetName().c_str())
} }
for (auto pair : memory_offset_) { for (auto pair : memory_offset_) {
GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first,
pair.second.mem_offset_); pair.second.mem_offset_);
} }
return ge::SUCCESS; return ge::SUCCESS;
@ -512,11 +512,11 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
peer_op_desc->SetOutputOffset(output_list); peer_op_desc->SetOutputOffset(output_list);
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(),
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(),
output_list_this.at(out2ins.begin()->first), peer_output_offset); output_list_this.at(out2ins.begin()->first), peer_output_offset);
} else { } else {
GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
} }
// first input is beginning offset // first input is beginning offset
@ -542,7 +542,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
} }
GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
"size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), "size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
@ -1549,7 +1549,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
auto continuous_type = iter->second; auto continuous_type = iter->second;
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
if (continuous_input) { if (continuous_input) {
GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.", GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.",
input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
return false; return false;
} }
@ -1559,7 +1559,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
node_2_continuous_type.emplace(out_node, continuous_type); node_2_continuous_type.emplace(out_node, continuous_type);
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
if (continuous_input) { if (continuous_input) {
GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.", GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.",
input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
return false; return false;
} }

@ -366,8 +366,11 @@ void ModelBuilder::InitL1FusionOption() {
string buffer_optimize = "off_optimize"; string buffer_optimize = "off_optimize";
graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
if (ret == GRAPH_SUCCESS) { if (ret == GRAPH_SUCCESS) {
is_l1_fusion_enable_ = (buffer_optimize == "l1_optimize"); bool off_superkernel = false;
GELOGD("The value of %s is %s.", BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str()); (void)AttrUtils::GetBool(compute_graph_, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel);
is_l1_fusion_enable_ = ((buffer_optimize == "l1_optimize") && (!off_superkernel));
GELOGI("Compute graph %s the value of %s is %s, superkernel flag %d.", compute_graph_->GetName().c_str(),
BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str(), is_l1_fusion_enable_);
} else { } else {
GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str()); GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str());
} }
@ -709,7 +712,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
GE_TIMESTAMP_START(SetInputOutputOffset); GE_TIMESTAMP_START(SetInputOutputOffset);
SetInputOutputOffsetPass input_output_offset; SetInputOutputOffsetPass input_output_offset;
GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed.");
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run");
// Compile single op in graph build stage // Compile single op in graph build stage
GE_TIMESTAMP_START(CompileSingleOp); GE_TIMESTAMP_START(CompileSingleOp);

@ -532,20 +532,20 @@ Status DavinciModel::DoTaskSink() {
GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");
if (known_node_) { if (known_node_) {
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed");
} }
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed");
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed");
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed");
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed");
GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));

@ -3090,6 +3090,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
sub_graph->SetSessionID(session_id); sub_graph->SetSessionID(session_id);
sub_graph->SetGraphID(graph_node->GetGraphId()); sub_graph->SetGraphID(graph_node->GetGraphId());
} }
bool off_superkernel = false;
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel);
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(),
off_superkernel);
return FAILED;
}
}
GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph");
GE_DUMP(merged_compute_graph, "mergedComputeGraph"); GE_DUMP(merged_compute_graph, "mergedComputeGraph");
compute_graph = merged_compute_graph; compute_graph = merged_compute_graph;

@ -111,8 +111,9 @@ void DynamicStitchKernel::ComputeMergedShape(const vector<ConstGeTensorPtr> &inp
int32_t merged_first_dim = 0; int32_t merged_first_dim = 0;
int64_t indices_shape_size = 0; int64_t indices_shape_size = 0;
for (int i = 0; i < n_; i++) { for (int i = 0; i < n_; i++) {
indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize(); // shape is [] means scalar
indices_shape_size = indices_shape_size == 0 ? 1 : indices_shape_size; indices_shape_size =
input[i]->GetTensorDesc().GetShape().GetDims().empty() ? 1 : input[i]->GetTensorDesc().GetShape().GetShapeSize();
const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data()); const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data());
for (int64_t j = 0; j < indices_shape_size; j++) { for (int64_t j = 0; j < indices_shape_size; j++) {
merged_first_dim = std::max(merged_first_dim, input_indices[j]); merged_first_dim = std::max(merged_first_dim, input_indices[j]);

Loading…
Cancel
Save