add new buffer fusion pass

pull/1880/head
etone-chan 5 years ago
parent 7d7478d16d
commit 1525fae8b9

@ -65,6 +65,8 @@
#include "pre_activate/ascend/format_type/check_consistency.h"
#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
@ -363,6 +365,7 @@ void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGrap
auto ub_fusion_pm = std::make_shared<PassManager>("ub_fusion_pm");
ub_fusion_pm->AddPass(std::make_shared<Conv2DBackpropEltwiseEltwiseFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<Conv2DBackpropEltwiseFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<StridedReadConvStridedWriteFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<ConvBnReduceFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<ConvSingleInFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<BnupdateEltwiseFusionPass>(fusion_id_allocator));
@ -371,6 +374,7 @@ void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGrap
ub_fusion_pm->AddPass(std::make_shared<ConvDoubleInFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<ReduceEltwiseFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<SegmentEltwiseFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<MultiOutputFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<EltwiseFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<DepthwiseConvEltwiseFusionPass>(fusion_id_allocator));
ub_fusion_pm->AddPass(std::make_shared<UbPatternFusion>());

@ -49,6 +49,19 @@ bool FusionBasePass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const A
cnode->inputs().size() == ELTWISE_DOUBLE_IN_INPUT_SIZE;
}
bool FusionBasePass::CheckMultiOutputEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(manager);
if (!node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) {
return false;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto user_nodes = manager->node_users()[node];
return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL &&
AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_MULTI_USE &&
cnode->inputs().size() == ELTWISE_INPUT_SIZE;
}
void FusionBasePass::SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record) {
auto id = fusion_id_allocator->AllocateFusionId();
for (auto node : record) {

@ -33,8 +33,12 @@ const int8_t MAX_ELTWISE_NUM = 3;
const int8_t MIN_ELTWISE_SIZE = 2;
const int8_t ELTWISE_INPUT_SIZE = 2;
const int8_t ELTWISE_DOUBLE_IN_INPUT_SIZE = 3;
const int8_t CONV_DOUBLE_IN_INPUT_SIZE = 3;
const int8_t CONV_QUART_IN_INPUT_SIZE = 5;
const int8_t ELTWISE_USE = 1;
const int8_t ELTWISE_MULTI_USE = 2;
const int8_t MAX_ELTWISE_SIZE = 6;
const int8_t MULTI_ELTWISE_SIZE = 4;
using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>;
struct BufferFusionInfo_t {
@ -58,6 +62,7 @@ class FusionBasePass : public Pass {
void SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record);
bool CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node);
bool CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node);
bool CheckMultiOutputEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node);
FusionIdAllocatorPtr fusion_id_allocator;
};
} // namespace opt

@ -0,0 +1,81 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/ascend/buffer_fusion/multi_output_fusion_pass.h"
#include <vector>
#include <unordered_set>
#include <memory>
#include <string>
#include "kernel/kernel_fusion.h"
#include "debug/anf_ir_dump.h"
#include "session/anf_runtime_algorithm.h"
#include "operator/ops.h"
#include "utils/context/ms_context.h"
#include "pre_activate/common/fusion_id_allocator.h"
namespace mindspore {
namespace opt {
void MultiOutputFusionPass::MatchMultiOutputEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(candidate_fusion);
auto manager = kernel_graph.manager();
MS_EXCEPTION_IF_NULL(manager);
std::unordered_set<AnfNodePtr> record{cnode};
auto eltwise_input = cnode->input(1);
if (CheckMultiOutputEltWiseNode(manager.get(), eltwise_input)) {
(void)record.insert(eltwise_input);
auto input_cnode = eltwise_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(input_cnode);
eltwise_input = input_cnode->input(1);
} else {
return;
}
while (CheckEltWiseNode(manager.get(), eltwise_input)) {
(void)record.insert(eltwise_input);
if (record.size() == MULTI_ELTWISE_SIZE) {
break;
}
auto input_cnode = eltwise_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(input_cnode);
eltwise_input = input_cnode->input(1);
}
if (record.size() != MULTI_ELTWISE_SIZE) {
return;
}
candidate_fusion->push_back(record);
SetRecordFusionId(record);
}
void MultiOutputFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(candidate_fusion);
std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return());
std::reverse(node_list.begin(), node_list.end());
for (auto &node : node_list) {
if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) ||
AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) {
continue;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL &&
AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) {
MatchMultiOutputEltwise(cnode, kernel_graph, candidate_fusion);
}
}
}
} // namespace opt
} // namespace mindspore

@ -0,0 +1,48 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_
#include <unordered_set>
#include <vector>
#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
#include "ir/anf.h"
#include "pre_activate/common/pass.h"
#include "pre_activate/common/fusion_id_allocator.h"
#include "device/kernel_info.h"
#include "kernel/kernel.h"
#include "session/kernel_graph.h"
namespace mindspore {
namespace opt {
using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>;
class MultiOutputFusionPass : public FusionBasePass {
public:
explicit MultiOutputFusionPass(FusionIdAllocatorPtr idAllocator)
: FusionBasePass("MultiOutputFusionPass", idAllocator) {}
~MultiOutputFusionPass() override = default;
void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override;
private:
void MatchMultiOutputEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion);
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MULTI_OUTPUT_FUSION_PASS_H_

@ -0,0 +1,90 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/ascend/buffer_fusion/stridedread_conv_stridedwrite_fusion_pass.h"
#include <vector>
#include <unordered_set>
#include <memory>
#include <string>
#include "kernel/kernel_fusion.h"
#include "debug/anf_ir_dump.h"
#include "session/anf_runtime_algorithm.h"
#include "operator/ops.h"
#include "utils/context/ms_context.h"
#include "pre_activate/common/fusion_id_allocator.h"
namespace mindspore {
namespace opt {
void StridedReadConvStridedWriteFusionPass::MatchStridedReadConvStridedWrite(const CNodePtr &cnode,
const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(candidate_fusion);
auto manager = kernel_graph.manager();
MS_EXCEPTION_IF_NULL(manager);
std::unordered_set<AnfNodePtr> record{cnode};
auto write_input = cnode->input(1);
if (CheckEltWiseNode(manager.get(), write_input)) {
(void)record.insert(write_input);
auto input_cnode = write_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(input_cnode);
write_input = input_cnode->input(1);
}
if (!write_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(write_input) ||
fusion_id_allocator->HasFusionIdAttr(write_input)) {
return;
}
auto conv_cnode = write_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(conv_cnode);
if (AnfAlgo::GetKernelType(conv_cnode) == KernelType::TBE_KERNEL &&
AnfAlgo::GetFusionType(conv_cnode) == kernel::FusionType::CONVLUTION &&
conv_cnode->inputs().size() >= CONV_DOUBLE_IN_INPUT_SIZE &&
conv_cnode->inputs().size() <= CONV_QUART_IN_INPUT_SIZE) {
(void)record.insert(write_input);
auto conv_input = conv_cnode->input(1);
if (!conv_input->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(conv_input) ||
fusion_id_allocator->HasFusionIdAttr(conv_input)) {
return;
}
if (AnfAlgo::GetCNodeName(conv_input) == kStridedReadOpName) {
(void)record.insert(conv_input);
candidate_fusion->push_back(record);
SetRecordFusionId(record);
}
}
}
void StridedReadConvStridedWriteFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(candidate_fusion);
std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return());
for (auto &node : node_list) {
if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) ||
AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) {
continue;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::GetCNodeName(cnode) == kStridedWriteOpName) {
MatchStridedReadConvStridedWrite(cnode, kernel_graph, candidate_fusion);
}
}
}
} // namespace opt
} // namespace mindspore

@ -0,0 +1,48 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_
#include <unordered_set>
#include <vector>
#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
#include "ir/anf.h"
#include "pre_activate/common/pass.h"
#include "pre_activate/common/fusion_id_allocator.h"
#include "device/kernel_info.h"
#include "kernel/kernel.h"
#include "session/kernel_graph.h"
namespace mindspore {
namespace opt {
using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>;
class StridedReadConvStridedWriteFusionPass : public FusionBasePass {
public:
explicit StridedReadConvStridedWriteFusionPass(FusionIdAllocatorPtr idAllocator)
: FusionBasePass("StridedReadConvStridedWriteFusionPass", idAllocator) {}
~StridedReadConvStridedWriteFusionPass() override = default;
void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override;
private:
void MatchStridedReadConvStridedWrite(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion);
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_STRIDEDREAD_CONV_STRIDEDWRITE_FUSION_PASS_H_

@ -153,6 +153,8 @@ constexpr auto kLarsV2UpdateOpName = "LarsV2Update";
constexpr auto kSquareSumAllOpName = "SquareSumAll";
constexpr auto kNMSWithMaskOpName = "NMSWithMask";
constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt";
constexpr auto kStridedReadOpName = "StridedRead";
constexpr auto kStridedWriteOpName = "StridedWrite";
// attr key name
constexpr auto kAttrInputNames = "input_names";

Loading…
Cancel
Save