From 033bdf7262284d73515b82ab8e54b572d6d16d95 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Mon, 29 Mar 2021 21:42:45 +0800 Subject: [PATCH] add hccl tailing optimization pass --- ge/CMakeLists.txt | 2 + ge/graph/manager/graph_manager.cc | 9 +++ .../passes/hccl_tailing_optimization_pass.cc | 72 +++++++++++++++++++ .../passes/hccl_tailing_optimization_pass.h | 34 +++++++++ tests/ut/ge/CMakeLists.txt | 1 + 5 files changed, 118 insertions(+) create mode 100644 ge/graph/passes/hccl_tailing_optimization_pass.cc create mode 100644 ge/graph/passes/hccl_tailing_optimization_pass.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index bd9edd86..78291224 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -215,6 +215,7 @@ set(TRAIN_SRC_LIST "graph/passes/dimension_compute_pass.cc" "graph/passes/dropout_pass.cc" "graph/passes/hccl_group_pass.cc" + "graph/passes/hccl_tailing_optimization_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_remove_pass.cc" "graph/passes/inplace_support_check_pass.cc" @@ -612,6 +613,7 @@ set(INFER_SRC_LIST "graph/passes/link_gen_mask_nodes_pass.cc" "graph/passes/replace_with_empty_const_pass.cc" "graph/passes/hccl_group_pass.cc" + "graph/passes/hccl_tailing_optimization_pass.cc" "graph/passes/memcpy_addr_async_pass.cc" "graph/passes/set_input_output_offset_pass.cc" "graph/passes/parallel_group_pass.cc" diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 50112c2d..806b0e57 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -55,6 +55,7 @@ #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/flow_ctrl_pass.h" #include "graph/passes/fuse_data_nodes_with_common_input_pass.h" +#include "graph/passes/hccl_tailing_optimization_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/input_output_connection_identify_pass.h" #include "graph/passes/iterator_op_pass.h" @@ -2252,6 +2253,14 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory GE_CHK_STATUS_RET( graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) + + std::string hccl_tailing_optimize; + if (GetContext().GetOption("ge.exec.hccl_tailing_optimize", hccl_tailing_optimize) == SUCCESS && + hccl_tailing_optimize == "1") { + GELOGI("Add hccl tailing optimize stage"); + GE_CHK_STATUS_RET( + graph_pass.AddPass("OptimizeStage1_3::HcclTailingOptimizationPass", new (std::nothrow) HcclTailingOptimizationPass)) + } } GE_TIMESTAMP_START(graph_pass); ret = graph_pass.Run(compute_graph); diff --git a/ge/graph/passes/hccl_tailing_optimization_pass.cc b/ge/graph/passes/hccl_tailing_optimization_pass.cc new file mode 100644 index 00000000..a1bdb2d1 --- /dev/null +++ b/ge/graph/passes/hccl_tailing_optimization_pass.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "hccl_tailing_optimization_pass.h" +#include "graph/common/transop_util.h" + +namespace ge { +Status HcclTailingOptimizationPass::Run(ComputeGraphPtr graph) { + for (const auto &node : graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node); + if (node->GetType() != HCOMALLREDUCE) { + continue; + } + for (auto &out_node : node->GetOutDataNodes()) { + if (!TransOpUtil::IsTransOp(out_node)) { + continue; + } + + GE_CHK_STATUS_RET_NOLOG(CopyControlEdgesForTransOp(out_node)); + } + } + return SUCCESS; +} +Status HcclTailingOptimizationPass::CopyControlEdgesForTransOp(NodePtr &first_trans_op) { + auto dst_in_ctrl_anchor = first_trans_op->GetInControlAnchor(); + GE_CHECK_NOTNULL(dst_in_ctrl_anchor); + std::set src_out_ctrl_anchors; + std::vector trans_op_nodes{first_trans_op}; + + while (!trans_op_nodes.empty()) { + auto trans_op_node = trans_op_nodes.back(); + trans_op_nodes.pop_back(); + + for (auto &next_node : trans_op_node->GetOutDataNodes()) { + auto in_ctrl_anchor = next_node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor); + + auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors(); + + for (auto src_ctrl_anchor : peer_out_ctrl_anchors) { + GE_CHECK_NOTNULL(src_ctrl_anchor->GetOwnerNode()); + src_out_ctrl_anchors.emplace(src_ctrl_anchor); + } + if (TransOpUtil::IsTransOp(next_node)) { + trans_op_nodes.emplace_back(next_node); + } + } + } + + for (auto &src_out_ctrl_anchor : src_out_ctrl_anchors) { + if (!src_out_ctrl_anchor->IsLinkedWith(dst_in_ctrl_anchor)) { + GE_CHK_GRAPH_STATUS_RET( + GraphUtils::AddEdge(src_out_ctrl_anchor, dst_in_ctrl_anchor), "Failed to add edge between %s->%s", + src_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), first_trans_op->GetName().c_str()); + } + } + + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/hccl_tailing_optimization_pass.h b/ge/graph/passes/hccl_tailing_optimization_pass.h new file mode 100644 index 00000000..3b6ccaea --- /dev/null +++ b/ge/graph/passes/hccl_tailing_optimization_pass.h @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_HCCL_TAILING_OPTIMIZATION_PASS_H_ +#define GE_GRAPH_PASSES_HCCL_TAILING_OPTIMIZATION_PASS_H_ + +#include + +#include "inc/graph_pass.h" + +namespace ge { +class HcclTailingOptimizationPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; + + private: + Status CopyControlEdgesForTransOp(NodePtr &first_trans_op); +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_HCCL_TAILING_OPTIMIZATION_PASS_H_ diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index cf60d1aa..994b9e76 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -270,6 +270,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/link_gen_mask_nodes_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/replace_with_empty_const_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/hccl_tailing_optimization_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"