diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 3acd4a7f..9dc0cf73 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -551,31 +551,11 @@ void GetMaxBatchAllMemorySize(std::map> &batch_all_ } } -void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { - auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); - // if input size just one and from variable, no need to reassign continuous memory - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { - auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); - auto in_node = peer_out_anchor->GetOwnerNode(); - GE_IF_BOOL_EXEC(in_node == nullptr, return); - if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { - GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); - (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); - } - } -} - void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; std::map> batch_all_memory_size; std::map batch_total_size; for (const NodePtr &n : compute_graph_->GetAllNodes()) { - MarkContinuousAllocedForOneInputFromVariable(n); - auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -1081,53 +1061,18 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, return block; } -void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, - const NodePtr &n) { - const auto node_op_desc = n->GetOpDesc(); - for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { - int32_t reuse_in_index = -1; - if (!GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) { - isAllOutputRef = false; - break; - } else { - zero_memory_list_.emplace_back(n, kOutput, index); - isOutputHasRef = true; - } - } -} - - -Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, +MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); auto node_op_desc = n->GetOpDesc(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); - - // continuous output support ref only when all output ref input - bool isAllOutputRef = true; - bool isOutputHasRef = false; - - ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); - - if (isAllOutputRef) { - GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); - return SUCCESS; - } - - if (!isAllOutputRef && isOutputHasRef) { - GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", - n->GetName().c_str()); - return INTERNAL_ERROR; - } - + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); MemoryBlock *block = nullptr; int64_t total_size = 0; int64_t memory_type = RT_MEMORY_HBM; for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); - return INTERNAL_ERROR; + return nullptr; } if (CheckIsZeroMemNodeType(n->GetType())) { @@ -1137,8 +1082,8 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorGetName().c_str(), index); - return INTERNAL_ERROR; + GELOGI("Get size failed"); + return nullptr; } size_t align_size = static_cast(size); AlignMemOffset(align_size); @@ -1161,7 +1106,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorfirst_continuous_block_ = true; block->last_continuous_block_ = true; - } else { - GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); - return INTERNAL_ERROR; } - return SUCCESS; + return block; } MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector &ranges, @@ -1191,8 +1133,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, NodeIndexIO node_index_io(n, index, kOut); int64_t size = 0; auto output_op_desc = node_op_desc->GetOutputDescPtr(index); - GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); + if (output_op_desc != nullptr) { + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); + } size_t no_align_size = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, return nullptr, "Get no align size failed"); @@ -1203,14 +1146,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); block->ref_count_++; } else { - // if ref input is variable, can not find symbol, must judge alone - int32_t reuse_in_index = -1; - if (GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) { - zero_memory_list_.emplace_back(n, kOutput, index, false); - GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); - return nullptr; - } - int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); @@ -1458,7 +1393,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); if (IsContinuousOutput(node)) { - return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); + (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); + return SUCCESS; } for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { int64_t size = 0; @@ -1952,8 +1888,9 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || - (node_type == ASSIGN) || (node_type == HVDWAIT); + (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || + (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || + (node_type == HVDCALLBACKBROADCAST); } bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 7e76081d..d514ca34 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -420,11 +420,7 @@ class BlockMemAssigner : public MemAssigner { bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); - void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); - - Status ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); - - void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); + MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); std::unordered_map>> reusable_blocks_; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index a367d334..1d465441 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2099,6 +2099,12 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } + for (auto op_desc : variable_op_list_) { + ret = + VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, + op_desc->GetName().c_str()); + } return ret; } @@ -2571,6 +2577,12 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b /// Status DavinciModel::ReturnNoOutput(uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id_); + for (auto op_desc : variable_op_list_) { + Status ret = VarManager::Instance(session_id_) + ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, + op_desc->GetName().c_str()); + } GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); std::vector outputs; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 030b864e..beb7cd42 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -93,7 +93,6 @@ #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" #include "graph/passes/memcpy_addr_async_pass.h" -#include "graph/passes/hccl_memcpy_pass.h" #include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" #include "inc/pass_manager.h" @@ -2122,8 +2121,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) TransOpWithoutReshapeFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass)) - GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index a67b917f..21747f42 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -28,157 +28,50 @@ namespace { const int32_t kAnchorSize = 1; const int kAnchorNum = 0; -const int32_t kAnchorAssignRefIndex = 0; -const int32_t kAnchorAssignValueIndex = 1; const char *const kInputMutable = "_input_mutable"; } // namespace namespace ge { Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { - Status ret = SUCCESS; GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); for (const auto &node : graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - ret = ContinuousInputProcess(graph, node); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); - return ret; - } - - ret = MutableInputProcess(graph, node); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); - return ret; - } - - ret = P2pmemInputProcess(graph, node); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); - return ret; - } - - } - return ret; -} - -// If node has _input_mutable attr, means input mem may be modified when op execute. -// In order to avoid to affect another op execute with same input when data modified, -// need to inset memcpy node between. -// also works on situation that input is variable or const. -Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { - auto op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(op_desc == nullptr, continue); - bool node_input_mutable = false; - if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { - return SUCCESS; - } - - if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); - return FAILED; - } - if (!node_input_mutable) { - return SUCCESS; - } - - GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); - for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { - if (hccl_in_anchor == nullptr) { + bool node_input_mutable = false; + if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { continue; } - auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); - if (src_out_anchor_size == kAnchorSize) { - // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. - if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } - } + GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); + if (!node_input_mutable) { continue; } - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } - } - return SUCCESS; -} - -// If broadcast input size is bigger than 1, and input from variable, -// cause by broadcast input memory should be continuous, -// another featuremap mem will be allocated for broadcast input. -// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. -// In order to avoid move action out of model, use memcpy node instead of move action code. -Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { - auto op_desc = node->GetOpDesc(); - - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - - if (is_input_continuous && op_desc->GetInputsSize() > 1) { - GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); - // if input size bigger than one, insert memcpy between var data for support continous mem alloc + GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { if (hccl_in_anchor == nullptr) { continue; } auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - if (src_out_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; + GE_CHECK_NOTNULL(src_out_anchor); + + int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); + if (src_out_anchor_size == kAnchorSize) { + // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. + NodePtr src_node = src_out_anchor->GetOwnerNode(); + std::string src_type = src_node->GetType(); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); + if (check_src_type) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } } + continue; } - } - } - return SUCCESS; -} - -// if input is var type, and node input need p2p mem, then memcpy should be insert between the two -Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { - auto op_desc = node->GetOpDesc(); - - vector input_memory_types; - (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); - if (input_memory_types.empty()) { - return SUCCESS; - } - - for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { - if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { - continue; - } - - GELOGD("p2p input op is:%s.", op_desc->GetName().c_str()); - auto hccl_in_anchor = node->GetInDataAnchor(index); - if (hccl_in_anchor == nullptr) { - continue; - } - auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - if (src_out_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); @@ -189,12 +82,8 @@ Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const No return SUCCESS; } -bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { - return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); -} - /// -/// @brief Add Identity Node +/// @brief Add MemcpyAsync Node /// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr @@ -212,20 +101,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O node_name = CheckDuplicateName(node_name); OpDescPtr op_desc = MakeShared(node_name.c_str(), IDENTITY); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); + GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); return nullptr; } - GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); + GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); + GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); return nullptr; } ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); + GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); return nullptr; } // because history reason ,this pass can not do work after constant fold so mark it @@ -233,7 +122,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O NodePtr memcpy_node = graph->AddNode(op_desc); if (memcpy_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); + GELOGE(INTERNAL_ERROR, "Insert identity node fail."); return nullptr; } @@ -266,38 +155,7 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { /// Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor) { - GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); - GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); - - Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.", - src_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - return ret; - } - - ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.", - src_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - return ret; - } - return SUCCESS; -} - -/// -/// @brief Insert Identity node Between Hccl node and variable -/// @param [in] ComputeGraphPtr graph -/// @param [in] OutDataAnchorPtr src_out_anchor -/// @param [in] InDataAnchorPtr hccl_in_anchor -/// @return status -/// -Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { - GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); @@ -324,139 +182,6 @@ Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, co } return SUCCESS; } - -/// -/// @brief Insert assign node after broadcast node and variable to refresh variable data -/// @param [in] ComputeGraphPtr graph -/// @param [in] OutDataAnchorPtr var_out_anchor -/// @param [in] InDataAnchorPtr hccl_in_anchor -/// @return status -/// -Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, - const OutDataAnchorPtr &var_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { - if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { - GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - return SUCCESS; - } - - if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) { - GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str()); - return SUCCESS; - } - - GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - - for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) { - if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) { - GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str()); - return SUCCESS; - } - } - - NodePtr assign_node = CreateAssignNode(graph, var_out_anchor); - GE_CHECK_NOTNULL(assign_node); - - OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx()); - GE_CHECK_NOTNULL(hccl_out_anchor); - - Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex)); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(), - assign_node->GetName().c_str()); - return FAILED; - } - - ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex)); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(), - assign_node->GetName().c_str()); - return FAILED; - } - - // add control edge between assign node and node after broadcast node - OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(assign_out_control_anchor); - - for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) { - if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { - continue; - } - ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor()); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str()); - return FAILED; - } - } - - for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) { - if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { - continue; - } - ret = assign_out_control_anchor->LinkTo(in_control_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), - in_control_anchor->GetOwnerNode()->GetName().c_str()); - return FAILED; - } - } - return SUCCESS; -} - -/// -/// @brief create assign Node, add to graph -/// @param [in] ge::ComputeGraphPtr graph -/// @param [in] ge::OutDataAnchorPtr variable node out anchor -/// @return ge::NodePtr -/// -NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { - GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); - NodePtr pre_node = out_data_anchor->GetOwnerNode(); - OpDescPtr pre_op_desc = pre_node->GetOpDesc(); - if (pre_op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid."); - return nullptr; - } - - std::string node_name = pre_node->GetName() + "_" + ASSIGN; - node_name = CheckDuplicateName(node_name); - OpDescPtr op_desc = MakeShared(node_name.c_str(), ASSIGN); - if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail."); - return nullptr; - } - GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); - - graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); - return nullptr; - } - - ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail."); - return nullptr; - } - - ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail."); - return nullptr; - } - - NodePtr assign_node = graph->AddNode(op_desc); - if (assign_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); - return nullptr; - } - - return assign_node; -} - - /// /// @brief Clear Status, used for subgraph pass /// @return SUCCESS diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index 7e52708a..e73a5483 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -32,28 +32,11 @@ class HcclMemcpyPass : public GraphPass { private: NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); - NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); - std::string CheckDuplicateName(const std::string &node_name); Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); - Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor); - - Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, - const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor); - - Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - - Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - - Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - - bool IsDataNode(const std::string& node_type); - std::unordered_map node_num_map_; }; } // namespace ge diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 32f877cf..6bb3105c 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -51,6 +51,7 @@ #include "graph/passes/for_pass.h" #include "graph/passes/guarantee_const_pass.h" #include "graph/passes/hccl_group_pass.h" +#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/infershape_pass.h" #include "graph/passes/net_output_pass.h" @@ -1732,6 +1733,8 @@ Status GraphPrepare::PrepareOptimize() { PassManager graph_pass; try { (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); + // todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy + (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 3cd0455d..eda3cb15 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -282,4 +282,41 @@ TEST_F(UtestDavinciModel, init_unknown) { const vector outputs = { &virtual_addr }; EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); } + +TEST_F(UtestDavinciModel, ReturnNoOutput_test) { + DavinciModel model(0, nullptr); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); + var1->AddInputDesc(tensor); + var1->AddOutputDesc(tensor); + var1->SetInputOffset({1024}); + var1->SetOutputOffset({1024}); + + model.variable_op_list_.push_back(var1); + + + EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); +} + +TEST_F(UtestDavinciModel, SyncVarData_test) { + DavinciModel model(0, nullptr); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); + var1->AddInputDesc(tensor); + var1->AddOutputDesc(tensor); + var1->SetInputOffset({1024}); + var1->SetOutputOffset({1024}); + + model.variable_op_list_.push_back(var1); + + EXPECT_NE(model.SyncVarData(), SUCCESS); +} + + } // namespace ge