diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index fccdb57b..01c7de95 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -60,7 +60,7 @@ bool CheckShape(Format format, const ShapeVector &shape) { default: std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + " and FORMAT_FRACTAL_NZ is not supported."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); return false; } } diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc index c36bffb5..36bea872 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -59,7 +59,7 @@ bool CheckShape(Format format, const ShapeVector &shape) { default: std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + " and FORMAT_FRACTAL_ZZ is not supported."; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_FORMAT_INVALID, error.c_str()); return false; } } diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc index b09fd168..6817713a 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -92,7 +92,8 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { std::shared_ptr dst(new (std::nothrow) uint8_t[total_size], std::default_delete()); if (dst == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, + "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", TypeUtils::FormatToSerialString(args.src_format).c_str(), TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); return ACL_ERROR_GE_MEMORY_ALLOCATION; diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index 694777f3..49bb5cd6 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -50,21 +50,21 @@ std::map>> perm_args{ bool IsShapeArgValid(const std::vector &src_shape, const std::vector &perm_arg) { if (src_shape.empty()) { std::string error = "Failed to transpose, empty src shape"; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); - GELOGE(PARAM_INVALID, "Failed to transpose, empty src shape"); + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); + GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to transpose, empty src shape"); return false; } for (auto dim : src_shape) { if (dim < 0) { std::string error = "Failed to transpose, negative dim in src shape " + FmtToStr(ShapeToString(src_shape)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); return false; } } if (perm_arg.size() != src_shape.size()) { std::string error = "Failed to transpose, the size of src shape" + FmtToStr(src_shape.size()) + " and perm arg" + FmtToStr(perm_arg.size()) + " are different"; - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); return false; } @@ -73,7 +73,7 @@ bool IsShapeArgValid(const std::vector &src_shape, const std::vector(perm) >= perm_arg.size() || ++exists[perm] > 1) { std::string error = "Failed to transpose, duplicated perm arg " + FmtToStr(perm) + ", perm arg " + FmtToStr(JoinToString(perm_arg)); - GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_PARAM_INVALID, error.c_str()); return false; } } @@ -82,11 +82,11 @@ bool IsShapeArgValid(const std::vector &src_shape, const std::vector &src_shape, DataType src_data_type, const std::vector &perm_arg) { if (src == nullptr) { - GELOGE(PARAM_INVALID, "Failed to transpose, the src is null"); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to transpose, the src is null"); return false; } if (GetSizeByDataType(src_data_type) < 0) { - GELOGE(UNSUPPORTED, "Failed to transpose, the data type %s is not support", + GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Failed to transpose, the data type %s is not support", TypeUtils::DataTypeToSerialString(src_data_type).c_str()); return false; } diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 313e010a..14882683 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -36,6 +36,7 @@ #include "graph/utils/type_utils.h" #include "init/gelib.h" #include "model/ge_model.h" +#include "analyzer/analyzer.h" using std::map; using std::string; @@ -1007,13 +1008,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", graph_id); - VarManagerPool::Instance().RemoveVarManager(session_id); - return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; + ret = GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; } + RtContextUtil::GetInstance().DestroyRtContexts(session_id); + Analyzer::GetInstance()->DestroySessionJsonObject(session_id); VarManagerPool::Instance().RemoveVarManager(session_id); - - return SUCCESS; + return ret; } Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) { diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 1a4b62e4..6fbb9826 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -1735,7 +1735,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector /// void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_); - GELOGD("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); + GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); string op_no_reuse_mem_str; const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); GE_IF_BOOL_EXEC(op_no_reuse_mem != nullptr, op_no_reuse_mem_str = string(op_no_reuse_mem); @@ -2125,7 +2125,7 @@ void SetBlockOpMemOffset(MemoryBlock *block, int32_t child_block_level) { child_block_level++; for (MemoryBlock *child_block : block->ChildBlockList()) { - SetBlockOpMemOffset(child_block, child_block_level); + SetBlockOpMemOffset(child_block, child_block_level); } } diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index f8b290cf..a817cdc1 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -311,6 +311,7 @@ Status VarMemAssignUtil::SetOutTransNodeToAssign(const ge::NodePtr &node, const } Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph) { + GraphToNodeMap graph_to_node; for (const ge::NodePtr &n : compute_graph->GetAllNodes()) { string ref_var_src_var_name; auto op_desc = n->GetOpDesc(); @@ -318,7 +319,8 @@ Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &comput for (uint32_t idx = 0; idx < op_desc->GetOutputsSize(); idx += 1) { const auto out_desc = op_desc->MutableOutputDesc(idx); if (ge::AttrUtils::GetStr(out_desc, REF_VAR_SRC_VAR_NAME, ref_var_src_var_name)) { - GE_CHK_STATUS_RET(AssignData2VarRef(n, ref_var_src_var_name, compute_graph->GetSessionID(), idx)); + GE_CHK_STATUS_RET( + AssignData2VarRef(n, ref_var_src_var_name, compute_graph->GetSessionID(), idx, graph_to_node)); } } } @@ -326,16 +328,37 @@ Status VarMemAssignUtil::AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &comput } Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, const string &src_var_name, - uint64_t session_id, uint32_t out_index) { + uint64_t session_id, uint32_t out_index, + GraphToNodeMap &graph_to_node) { // Get ref_var_src_var address auto root_graph = GraphUtils::FindRootGraph(has_ref_attr_node->GetOwnerComputeGraph()); GE_CHECK_NOTNULL(root_graph); - ge::NodePtr var_ref_src_var = root_graph->FindNode(src_var_name); - if (var_ref_src_var == nullptr) { + // Cache mapping (name to nodeptr) simproves query performance + auto &name_to_node = graph_to_node[root_graph]; + if (name_to_node.empty()) { + for (const ge::NodePtr &n : root_graph->GetDirectNode()) { + name_to_node.emplace(n->GetName(), n); + } + for (auto sub_graph : root_graph->GetAllSubgraphs()) { + auto &name_to_node_sub = graph_to_node[sub_graph]; + if (name_to_node_sub.empty()) { + for (const ge::NodePtr &n : sub_graph->GetDirectNode()) { + name_to_node_sub.emplace(n->GetName(), n); + } + } + } + } + + ge::NodePtr var_ref_src_var = nullptr; + auto it = name_to_node.find(src_var_name); + if ((it != name_to_node.end()) && (it->second != nullptr)) { + var_ref_src_var = it->second; + } else { for (auto sub_graph : root_graph->GetAllSubgraphs()) { - auto node_ptr = sub_graph->FindNode(src_var_name); - if (node_ptr != nullptr) { - var_ref_src_var = node_ptr; + auto &name_to_node_sub = graph_to_node[sub_graph]; + it = name_to_node_sub.find(src_var_name); + if ((it != name_to_node_sub.end()) && (it->second != nullptr)) { + var_ref_src_var = it->second; break; } } diff --git a/ge/graph/build/memory/var_mem_assign_util.h b/ge/graph/build/memory/var_mem_assign_util.h index f0e6270d..9528dbdb 100644 --- a/ge/graph/build/memory/var_mem_assign_util.h +++ b/ge/graph/build/memory/var_mem_assign_util.h @@ -22,6 +22,8 @@ #include "graph/utils/node_utils.h" namespace ge { +using GraphToNodeMap = std::map>; + class VarMemAssignUtil { public: static Status AssignVarMemory(ge::ComputeGraphPtr &compute_graph); @@ -47,7 +49,7 @@ class VarMemAssignUtil { static Status DealTransNode(const ge::NodePtr &final_trans_node); static Status DealExportTransNode(const ge::NodePtr &node, const ge::NodePtr &final_trans_node); static Status AssignData2VarRef(const ge::NodePtr &variable_ref, const std::string &src_var_name, uint64_t session_id, - uint32_t out_index); + uint32_t out_index, GraphToNodeMap &graph_to_node); static Status SetOutTransNodeToAssign(const ge::NodePtr &node, const ge::NodePtr &final_trans_node, size_t index); }; diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 52642086..645d1f35 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -2137,7 +2137,6 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data Status DavinciModel::SyncVarData() { GELOGI("Sync var data, model id:%u", model_id_); - Status ret = SUCCESS; if (global_step_addr_ != nullptr && global_step_size_ != 0) { const vector v_step = { iterator_count_ }; @@ -2145,7 +2144,7 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } - return ret; + return SUCCESS; } Status DavinciModel::InitModelProfile() { @@ -3262,11 +3261,9 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp /// Status DavinciModel::UpdateIoTaskArgs(const std::map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label) { - string input_or_output; - is_input ? input_or_output = "input" : input_or_output = "output"; if (blobs.size() != data_info.size()) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", - input_or_output.c_str(), data_info.size(), blobs.size()); + is_input ? "input" : "output", data_info.size(), blobs.size()); return ACL_ERROR_GE_PARAM_INVALID; } @@ -3274,7 +3271,7 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & if (data.first >= blobs.size()) { // check data index. GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", - input_or_output.c_str(), data.first, blobs.size()); + is_input ? "input" : "output", data.first, blobs.size()); return ACL_ERROR_GE_PARAM_INVALID; } @@ -3306,21 +3303,20 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & } for (size_t count = 0; count < data.second.GetDataCount(); ++count) { - int64_t size = data.second.GetDataInfo().at(count).first; void *addr = data.second.GetDataInfo().at(count).second; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data) + data.second.GetRelativeOffset().at(count)); GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s", - input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str()); + is_input ? "input" : "output", data.first, addr, data.second.GetDataInfo().at(count).first, + buffer_addr, batch_label.c_str()); // For input data, just copy for rts task. - for (ZeroCopyTask &task : zero_copy_tasks_) { - if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { + for (auto &task : zero_copy_tasks_) { + bool not_same_batch = (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label); + if (not_same_batch) { continue; } uintptr_t addr_val = reinterpret_cast(addr); - if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) { - return ACL_ERROR_GE_PARAM_INVALID; - } + (void)task.UpdateTaskParam(addr_val, buffer_addr); } } } @@ -3980,7 +3976,7 @@ Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc) Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const { const auto it = orig_input_info_.find(index); if (it == orig_input_info_.end()) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); + GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "There is not AIPP related with index %u.", index); return ACL_ERROR_GE_AIPP_NOT_EXIST; } @@ -4014,7 +4010,7 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc) { if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { - GELOGI("there is not AIPP related with index %u.", index); + GELOGI("There is not AIPP related with index %u.", index); return SUCCESS; } @@ -4031,7 +4027,7 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op ConstGeTensorDescPtr data_input_desc = op_desc->GetInputDescPtr(kDataIndex); int64_t data_input_size; (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); - GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s.", + GELOGD("Related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s.", index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), @@ -4058,7 +4054,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector &output_dims) const { const auto it = aipp_dims_info_.find(index); if (it == aipp_dims_info_.end()) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); + GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "There is not AIPP related with index %u.", index); return ACL_ERROR_GE_AIPP_NOT_EXIST; } diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 5822056d..cc8bd90d 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -356,6 +356,14 @@ void CachingAllocator::FreeBlocks() { (void) FreeCachedBlocks(); } +void CachingAllocator::TryFreeBlocks() { + GELOGI("Try free blocks."); + std::lock_guard lock(mutex_); + if (allocated_blocks_.empty()) { + (void) FreeCachedBlocks(); + } +} + void CachingAllocator::FreeBlockBins() { GELOGI("Free block bins."); std::lock_guard lock(mutex_); diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index 27563c2d..a9c3202a 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -94,6 +94,13 @@ class CachingAllocator { /// Status Free(uint8_t *memory_addr, uint32_t device_id = 0); + /// + /// @ingroup ge_graph + /// @brief try to free memory when no memory is referenced + /// @return void + /// + void TryFreeBlocks(); + private: /// diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 25dabd78..34224fe5 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -136,12 +136,12 @@ Status HybridModelBuilder::Build() { GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[%s] Failed to RecoverGraphUnknownFlag", GetGraphName()); GE_CHK_STATUS_RET(IndexSpecialNodes(), "[%s] Failed to index nodes", GetGraphName()); GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName()); + GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); GE_CHK_STATUS_RET(AssignUninitializedConstantOps(), "[%s] Failed to assign uninitialized constants", GetGraphName()); GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName()); GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName()); GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName()); - GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); @@ -599,9 +599,9 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { return SUCCESS; } -Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph) { +Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph) { merged_graph = MakeShared("MergedGraph"); - for (const auto &node : root_graph.GetDirectNode()) { + for (const auto &node : root_graph->GetDirectNode()) { GE_CHECK_NOTNULL(node); auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -631,7 +631,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap } } } - GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph), + GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph), "[%s] Failed to merge subgraph.", subgraph->GetName().c_str()); } @@ -647,18 +647,19 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap return a_level < b_level; }); - for (auto &remained_subgraph : root_graph.GetAllSubgraphs()) { + for (auto &remained_subgraph : root_graph->GetAllSubgraphs()) { GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str()); GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), "Failed to add subgraph [%s]", remained_subgraph->GetName().c_str()); + remained_subgraph->SetParentGraph(merged_graph); } return SUCCESS; } -Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, - ComputeGraph &parent_graph, +Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph, + ComputeGraphPtr &parent_graph, ComputeGraph &sub_graph) { auto parent_node = sub_graph.GetParentNode(); GE_CHECK_NOTNULL(parent_node); @@ -687,15 +688,23 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, } } - parent_graph.AddNode(sub_node); + if (!sub_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) { + for (size_t i = 0; i < sub_node->GetOpDesc()->GetSubgraphInstanceNames().size(); ++i) { + auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, i); + GE_CHECK_NOTNULL(sub_sub_graph); + sub_sub_graph->SetParentGraph(parent_graph); + } + } + parent_graph->AddNode(sub_node); GELOGD("[%s::%s] added to parent graph: [%s].", sub_graph.GetName().c_str(), sub_node->GetName().c_str(), - parent_graph.GetName().c_str()); + parent_graph->GetName().c_str()); + sub_node->SetOwnerComputeGraph(parent_graph); } GELOGD("[%s] Done merging subgraph. remove it from root graph.", sub_graph.GetName().c_str()); - root_graph.RemoveSubgraph(sub_graph.GetName()); + root_graph->RemoveSubgraph(sub_graph.GetName()); return SUCCESS; } @@ -747,7 +756,7 @@ Status HybridModelBuilder::LoadGraph() { GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), root_graph->GetAllNodesSize()); - GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(*root_graph, merged_graph), "Failed to unfold subgraphs."); + GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph), "Failed to unfold subgraphs."); root_graph = std::move(merged_graph); GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), @@ -1030,8 +1039,8 @@ Status HybridModelBuilder::InitWeights() { GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", weight_base, sub_weight_buffer->GetSize()); - auto root_graph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph()); - hybrid_model_.weight_buffer_map_.emplace(root_graph->GetName(),std::move(sub_weight_buffer)); + auto root_graph = ge_root_model_->GetRootGraph()->GetSubgraph(subgraph_model.first); + hybrid_model_.weight_buffer_map_.emplace(root_graph->GetName(), std::move(sub_weight_buffer)); for (auto &node : root_graph->GetDirectNode()) { if (node->GetType() != CONSTANT) { continue; diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index a59a282a..30241003 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -47,8 +47,8 @@ class HybridModelBuilder { static Status HandleDtString(const GeTensor &tensor, void *var_addr); static Status MergeInputNodes(ComputeGraph &compute_graph); static Status MergeNetOutputNode(ComputeGraph &compute_graph); - static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); - static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); + static Status UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph); + static Status UnfoldSubgraph(ComputeGraphPtr &root_graph, ComputeGraphPtr &parent_graph, ComputeGraph &sub_graph); static Status BuildInputMapping(GraphItem &graph_item, std::vector &data_nodes, bool is_root_graph); diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index 6569764c..6246d6a1 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -19,6 +19,9 @@ #include #include +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_caching_allocator.h" + namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() { for (auto &it : stream_resources_) { @@ -69,6 +72,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::Release delete it->second; it->second = nullptr; (void)stream_resources_.erase(it); + MemManager::Instance().CachingInstance(RT_MEMORY_HBM).TryFreeBlocks(); return SUCCESS; } diff --git a/metadef b/metadef index ccfccb4b..7e90824d 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit ccfccb4bb355425cc09594b8ea267fb8ca938138 +Subproject commit 7e90824d05f349c77b85c5d547b80f9f7e197e35 diff --git a/parser b/parser index 0d4703aa..0b1cd5d9 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 0d4703aa893e90f23ba8a2dbd8903e028680213f +Subproject commit 0b1cd5d98d1f80c119c4aa251216d837f9f7c359 diff --git a/tests/ut/ge/common/format_transfer_transpose_unittest.cc b/tests/ut/ge/common/format_transfer_transpose_unittest.cc index 04f2a557..b710acde 100644 --- a/tests/ut/ge/common/format_transfer_transpose_unittest.cc +++ b/tests/ut/ge/common/format_transfer_transpose_unittest.cc @@ -4676,5 +4676,24 @@ TEST_F(UtestFormatTranspose, invalid_dst_format) { EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, src_shape, DT_FLOAT16, FORMAT_C1HWNC0, dst_shape), ACL_ERROR_GE_FORMAT_INVALID); } + +TEST_F(UtestFormatTranspose, invalid_src_data) { + uint8_t *data = nullptr; + TransArgs args{data, FORMAT_NCHW, FORMAT_NHWC, std::vector({1, 3, 8, 8}), std::vector({1, 8, 8, 3}), DT_INT64}; + FormatTransferTranspose transpose; + TransResult result; + EXPECT_EQ(transpose.TransFormat(args, result), ACL_ERROR_GE_PARAM_INVALID); + + uint16_t data1[3] = {14583, 12849, 14184}; + TransArgs args1{reinterpret_cast(data1), FORMAT_NCHW, FORMAT_NHWC, std::vector({-1, 3, 1, 1}), std::vector({1, 1, 1, 3}), DT_INT64}; + FormatTransferTranspose transpose1; + TransResult result1; + EXPECT_EQ(transpose1.TransFormat(args1, result1), ACL_ERROR_GE_SHAPE_INVALID); + + TransArgs args2{reinterpret_cast(data1), FORMAT_NCHW, FORMAT_NHWC, std::vector({3, 1, 1}), std::vector({1, 1, 1, 3}), DT_INT64}; + FormatTransferTranspose transpose2; + TransResult result2; + EXPECT_EQ(transpose2.TransFormat(args2, result2), ACL_ERROR_GE_SHAPE_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/generator/ge_generator_unittest.cc b/tests/ut/ge/generator/ge_generator_unittest.cc index 3107248d..fef90ee5 100644 --- a/tests/ut/ge/generator/ge_generator_unittest.cc +++ b/tests/ut/ge/generator/ge_generator_unittest.cc @@ -155,4 +155,17 @@ TEST_F(UtestGeGenerator, test_remove_const) { vector outputs; generator.RemoveConst(inputs, outputs); } + +TEST_F(UtestGeGenerator, test_generate_online_model) { + GeTensorDesc tensor_desc; + GeTensor tensor(tensor_desc); + const vector inputs = { tensor, tensor }; + auto compute_graph = MakeGraph(); + compute_graph->TopologicalSorting(); + Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); + GeGenerator generator; + generator.Initialize({}); + std::string name; + EXPECT_NE(generator.GenerateOfflineModel(graph, name, inputs), SUCCESS); +} } // namespace ge diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index 5cd16399..ba5cdcd4 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -33,6 +33,7 @@ #include "graph/build/memory/graph_mem_assigner.h" #include "graph/build/memory/hybrid_mem_assigner.h" #include "graph/build/memory/max_block_mem_assigner.h" +#include "graph/manager/graph_var_manager.h" #undef protected #undef private @@ -77,8 +78,8 @@ class UtestMemoryAssignerTest : public testing::Test { op_def->SetWorkspaceBytes(workspace_bytes); return op_def; } - void MakeGraph(ge::ComputeGraphPtr &graph) { - ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); + void MakeGraph(ge::ComputeGraphPtr &graph, const string &type = "some") { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000, type); op_def_a->SetStreamId(0); ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); op_def_b->SetStreamId(0); @@ -263,3 +264,38 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) { (void) ge::AttrUtils::GetInt(node_f->GetOpDesc()->GetInputDesc(0), ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, flag); EXPECT_EQ(flag, 1); } + +TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeGraph(graph, VARIABLE); + auto node_a = graph->FindNode("A"); + auto node_b = graph->FindNode("B"); + std::string value = "A"; + (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); + MemoryAssigner memory_assigner(graph); + map mem_offset; + size_t zero_memory_size = 0; + VarManager::Instance(0)->Init(0, 0, 0, 0); + EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); + + EXPECT_EQ(node_b->GetOpDesc()->GetOutputOffset()[0], node_a->GetOpDesc()->GetOutputOffset()[0]); +} + +TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeGraph(graph, VARIABLE); + + ge::ComputeGraphPtr sub_graph = make_shared(""); + MakeReuseGraph(sub_graph); + graph->AddSubGraph(sub_graph); + + auto node_a = graph->FindNode("A"); + auto node_b = graph->FindNode("B"); + std::string value = "M"; + (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); + MemoryAssigner memory_assigner(graph); + map mem_offset; + size_t zero_memory_size = 0; + VarManager::Instance(0)->Init(0, 0, 0, 0); + EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); +} \ No newline at end of file diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 3487f8ed..d51de821 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -22,6 +22,7 @@ #include "graph/utils/graph_utils.h" #include "common/profiling/profiling_manager.h" #include "graph/load/model_manager/davinci_model.h" +#include "graph/manager/graph_var_manager.h" using namespace std; @@ -51,6 +52,10 @@ int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) TEST_F(UtestDavinciModel, init_success) { DavinciModel model(0, nullptr); + VarManager::Instance(0)->Init(0, 0, 0, 0); + map options; + options[GRAPH_MEMORY_MAX_SIZE] = "1048576"; + VarManager::Instance(0)->SetMemoryMallocSize(options); ComputeGraphPtr graph = make_shared("default"); ProfilingManager::Instance().is_load_profiling_ = true; @@ -777,6 +782,10 @@ TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) { // test label_set_task Init TEST_F(UtestDavinciModel, label_task_success) { + VarManager::Instance(0)->Init(0, 0, 0, 0); + map options; + options[GRAPH_MEMORY_MAX_SIZE] = "1048576"; + VarManager::Instance(0)->SetMemoryMallocSize(options); DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); @@ -944,6 +953,11 @@ TEST_F(UtestDavinciModel, simple_test_gmock) { } TEST_F(UtestDavinciModel, NnExecute) { + VarManager::Instance(0)->Init(0, 0, 0, 0); + map options; + options[GRAPH_MEMORY_MAX_SIZE] = "1048576"; + VarManager::Instance(0)->SetMemoryMallocSize(options); + DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); ProfilingManager::Instance().is_load_profiling_ = true; @@ -967,6 +981,26 @@ TEST_F(UtestDavinciModel, NnExecute) { NodePtr node = graph->AddNode(op_desc); // op_index = 0 } + { + OpDescPtr op_desc = CreateOpDesc("memcpy", MEMCPYASYNC); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({5120}); + NodePtr node = graph->AddNode(op_desc); + + domi::TaskDef *task_def = model_task_def->add_task(); + task_def->set_stream_id(0); + task_def->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); + domi::MemcpyAsyncDef *memcpy_async = task_def->mutable_memcpy_async(); + memcpy_async->set_src(1024); + memcpy_async->set_dst(5120); + memcpy_async->set_dst_max(512); + memcpy_async->set_count(1); + memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); + memcpy_async->set_op_index(op_desc->GetId()); + } + { OpDescPtr op_desc = CreateOpDesc("output", NETOUTPUT); op_desc->AddInputDesc(tensor); diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 6a0e974e..6bc5cb9d 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -375,3 +375,53 @@ TEST_F(UtestGeHybrid, TestTaskContext) { ASSERT_EQ(task_context->GetInputDesc(1, new_desc), SUCCESS); ASSERT_EQ(new_desc.GetShape().GetDims(), new_shape.GetDims()); } + +TEST_F(UtestGeHybrid, unfold_subgraphs_success) { + ComputeGraphPtr merged_graph = nullptr; + + ComputeGraphPtr sub_sub_graph1 = std::make_shared("while_cond"); + OpDescPtr sub_sub_graph_while_cond_data_op_desc = CreateOpDesc("cond_data", DATA); + NodePtr sub_sub_graph_while_cond_data_node = sub_sub_graph1->AddNode(sub_sub_graph_while_cond_data_op_desc); + + ComputeGraphPtr sub_sub_graph2 = std::make_shared("while_body"); + /*OpDescPtr sub_sub_graph_while_body_const_op_desc = CreateOpDesc("body_const", CONSTANT); + NodePtr sub_sub_graph_while_body_const_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_const_op_desc);*/ + OpDescPtr sub_sub_graph_while_body_data_op_desc = CreateOpDesc("body_data", DATA); + NodePtr sub_sub_graph_while_body_data_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_data_op_desc); + sub_sub_graph2->SetGraphUnknownFlag(true); + /*OpDescPtr sub_sub_graph_while_body_add_op_desc = CreateOpDesc("body_add", ADD); + NodePtr sub_sub_graph_while_body_add_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_add_node); + sub_sub_graph_while_body_add_node->AddLinkFrom(sub_sub_graph_while_body_data_node); + sub_sub_graph_while_body_add_node->AddLinkFrom(sub_sub_graph_while_body_const_node);*/ + + ComputeGraphPtr sub_graph = std::make_shared("sub_graph"); + OpDescPtr sub_graph_while_op_desc = CreateOpDesc("while", WHILE); + NodePtr sub_graph_while_node = sub_graph->AddNode(sub_graph_while_op_desc); + sub_graph->SetGraphUnknownFlag(true); + sub_graph_while_node->GetOpDesc()->AddSubgraphName("while_cond"); + sub_graph_while_node->GetOpDesc()->AddSubgraphName("while_body"); + sub_graph_while_node->GetOpDesc()->SetSubgraphInstanceName(0, "while_cond"); + sub_graph_while_node->GetOpDesc()->SetSubgraphInstanceName(1, "while_body"); + + ComputeGraphPtr root_graph = std::make_shared("root_graph"); + auto partitioned_call_op_desc = MakeShared("partitioned_call", PARTITIONEDCALL); + auto partitioned_call_node = root_graph->AddNode(partitioned_call_op_desc); + partitioned_call_node->GetOpDesc()->AddSubgraphName("sub_graph"); + partitioned_call_node->GetOpDesc()->SetSubgraphInstanceName(0, "sub_graph"); + + root_graph->AddSubGraph(sub_sub_graph1); + root_graph->AddSubGraph(sub_sub_graph2); + sub_sub_graph1->SetParentGraph(root_graph); + sub_sub_graph2->SetParentGraph(root_graph); + sub_sub_graph1->SetParentNode(sub_graph_while_node); + sub_sub_graph2->SetParentNode(sub_graph_while_node); + + root_graph->AddSubGraph(sub_graph); + sub_graph->SetParentNode(partitioned_call_node); + sub_graph->SetParentGraph(root_graph); + + GeRootModelPtr root_model = MakeShared(root_graph); + HybridModel hybrid_model(root_model); + HybridModelBuilder hybrid_model_builder(hybrid_model); + EXPECT_EQ(hybrid_model_builder.UnfoldSubgraphs(root_graph, merged_graph), SUCCESS); +}