diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 35b6ebd06c..c8d484b81f 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -66,6 +66,8 @@ #include "pre_activate/ascend/format_type/check_consistency.h" #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" @@ -365,6 +367,7 @@ void AscendBackendUBFusionOptimization(const std::shared_ptr("ub_fusion_pm"); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); @@ -373,6 +376,7 @@ void AscendBackendUBFusionOptimization(const std::shared_ptrAddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator)); ub_fusion_pm->AddPass(std::make_shared()); diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc index 3f5dd98112..7925462f52 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc @@ -49,6 +49,19 @@ bool FusionBasePass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const A cnode->inputs().size() == ELTWISE_DOUBLE_IN_INPUT_SIZE; } +bool FusionBasePass::CheckMultiOutputEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(manager); + if (!node->isa() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { + return false; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto user_nodes = manager->node_users()[node]; + return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_MULTI_USE && + cnode->inputs().size() == ELTWISE_INPUT_SIZE; +} + void FusionBasePass::SetRecordFusionId(const std::unordered_set &record) { auto id = fusion_id_allocator->AllocateFusionId(); for (auto node : record) { diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h index 421efa9716..8d6eca774c 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h @@ -33,8 +33,12 @@ const int8_t MAX_ELTWISE_NUM = 3; const int8_t MIN_ELTWISE_SIZE = 2; const int8_t ELTWISE_INPUT_SIZE = 2; const int8_t ELTWISE_DOUBLE_IN_INPUT_SIZE = 3; +const int8_t CONV_DOUBLE_IN_INPUT_SIZE = 3; +const int8_t CONV_QUART_IN_INPUT_SIZE = 5; const int8_t ELTWISE_USE = 1; +const int8_t ELTWISE_MULTI_USE = 2; const int8_t MAX_ELTWISE_SIZE = 6; +const int8_t MULTI_ELTWISE_SIZE = 4; using FusedNodeRecord = std::vector>; struct BufferFusionInfo_t { @@ -58,6 +62,7 @@ class FusionBasePass : public Pass { void SetRecordFusionId(const std::unordered_set &record); bool CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); bool CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); + bool CheckMultiOutputEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); FusionIdAllocatorPtr fusion_id_allocator; }; } // namespace opt diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc new file mode 100644 index 0000000000..01f36d4047 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.cc @@ -0,0 +1,81 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void MultiOutputFusionPass::MatchMultiOutputEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + if (CheckMultiOutputEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + } else { + return; + } + while (CheckEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + if (record.size() == MULTI_ELTWISE_SIZE) { + break; + } + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + } + if (record.size() != MULTI_ELTWISE_SIZE) { + return; + } + candidate_fusion->push_back(record); + SetRecordFusionId(record); +} + +void MultiOutputFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + std::reverse(node_list.begin(), node_list.end()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchMultiOutputEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h new file mode 100644 index 0000000000..0e2510128a --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class MultiOutputFusionPass : public FusionBasePass { + public: + explicit MultiOutputFusionPass(FusionIdAllocatorPtr idAllocator) + : FusionBasePass("MultiOutputFusionPass", idAllocator) {} + ~MultiOutputFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchMultiOutputEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc new file mode 100644 index 0000000000..eb23b3d9cf --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.cc @@ -0,0 +1,90 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h" + +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void StridedReadConvStridedWriteFusionPass::MatchStridedReadConvStridedWrite(const CNodePtr &cnode, + const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto write_input = cnode->input(1); + + if (CheckEltWiseNode(manager.get(), write_input)) { + (void)record.insert(write_input); + auto input_cnode = write_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + write_input = input_cnode->input(1); + } + + if (!write_input->isa() || !AnfAlgo::IsRealCNodeKernel(write_input) || + fusion_id_allocator->HasFusionIdAttr(write_input)) { + return; + } + auto conv_cnode = write_input->cast(); + MS_EXCEPTION_IF_NULL(conv_cnode); + if (AnfAlgo::GetKernelType(conv_cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(conv_cnode) == kernel::FusionType::CONVLUTION && + conv_cnode->inputs().size() >= CONV_DOUBLE_IN_INPUT_SIZE && + conv_cnode->inputs().size() <= CONV_QUART_IN_INPUT_SIZE) { + (void)record.insert(write_input); + auto conv_input = conv_cnode->input(1); + if (!conv_input->isa() || !AnfAlgo::IsRealCNodeKernel(conv_input) || + fusion_id_allocator->HasFusionIdAttr(conv_input)) { + return; + } + + if (AnfAlgo::GetCNodeName(conv_input) == kStridedReadOpName) { + (void)record.insert(conv_input); + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } + } +} + +void StridedReadConvStridedWriteFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetCNodeName(cnode) == kStridedWriteOpName) { + MatchStridedReadConvStridedWrite(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h new file mode 100644 index 0000000000..c6c5fe88dc --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class StridedReadConvStridedWriteFusionPass : public FusionBasePass { + public: + explicit StridedReadConvStridedWriteFusionPass(FusionIdAllocatorPtr idAllocator) + : FusionBasePass("StridedReadConvStridedWriteFusionPass", idAllocator) {} + ~StridedReadConvStridedWriteFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchStridedReadConvStridedWrite(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 2ea3e7ba94..ff2ba05c84 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -154,6 +154,8 @@ constexpr auto kLarsV2UpdateOpName = "LarsV2Update"; constexpr auto kSquareSumAllOpName = "SquareSumAll"; constexpr auto kNMSWithMaskOpName = "NMSWithMask"; constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt"; +constexpr auto kStridedReadOpName = "StridedRead"; +constexpr auto kStridedWriteOpName = "StridedWrite"; // attr key name constexpr auto kAttrInputNames = "input_names";