!7158 [GraphKernel] Promote complex tensor as graph's input and recorrect getitem index for graph kernels fusion.

Merge pull request !7158 from TronZhang/promotion_const_for_gk
pull/7158/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit ff3438d9c2

@ -112,8 +112,9 @@ bool AkgKernelJsonGenerator::CreateInputDescJson(const AnfNodePtr &anf_node, con
input_desc_json[kJsonKeyName] = input_ptr->name();
input_desc_json[kJsonKeyTensorName] = "input_" + std::to_string(GetInputTensorIdxInc(anf_node, real_input_index));
auto input_shape = this->GetInputShape(anf_node, real_input_index);
if (anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL) &&
GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
bool fold_const =
anf_node->func_graph() != nullptr && anf_node->func_graph()->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL);
if (fold_const && GetInputTensorValue(anf_node, real_input_index, &input_desc_json)) {
MS_LOG(DEBUG) << "Take input[" << real_input_index << "] of [" << anf_node->DebugString(2)
<< "] as const tensor, shape: [" << Vector2Str(input_shape)
<< "], value: " << input_desc_json[kJsonKeyValue];

@ -701,9 +701,10 @@ bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann:
auto type_id = tensor->data_type();
auto *data = tensor->data_c();
MS_EXCEPTION_IF_NULL(data);
if (tensor->DataDim() > 1 || tensor->DataSize() != 1) {
if (tensor->DataSize() > 1) {
// not const tensor.
MS_LOG(WARNING) << "We take first value of tensor whose datasize != 1, [" << input_node->DebugString(2) << "]";
MS_LOG(WARNING) << "Not take value of tensor whose datasize greater than 1, [" << input_node->DebugString(2) << "]";
return false;
}
if (type_id == kFloat32->type_id()) {

@ -15,6 +15,7 @@
*/
#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
#include <map>
#include <tuple>
#include <unordered_set>
#include "pipeline/jit/parse/python_adapter.h"
#include "pipeline/jit/action.h"
@ -244,7 +245,7 @@ AnfNodePtrList EliminateMakeTuple(const FuncGraphPtr &fg, const FuncGraphManager
bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const AnfNodePtrList &outputs,
const DumpOption &dump_option, nlohmann::json *op_desc,
std::map<std::string, AnfNodePtr> *address_node_map) {
std::map<std::string, AnfNodePtr> *address_node_map = nullptr) {
kernel::AkgKernelJsonGenerator akg_kernel_json_generator(dump_option);
if (!akg_kernel_json_generator.CollectFusedJson(op_nodes, inputs, outputs)) {
MS_LOG(ERROR) << "Collect json desc failed.";
@ -262,6 +263,90 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const
MS_LOG(INFO) << "Collect fusion json: " << fused_name;
return true;
}
void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
MS_EXCEPTION_IF_NULL(inputs_ptr);
auto nodes = TopoSort(fg->get_return());
std::map<ValuePtr, AnfNodePtrList> vmap;
for (const auto &node : nodes) {
if (!node->isa<CNode>()) {
continue;
}
auto &inputs = node->cast<CNodePtr>()->inputs();
for (size_t i = 1; i < inputs.size(); ++i) {
auto tnode = inputs[i];
auto tensor = GetValueNode<tensor::TensorPtr>(tnode);
if (tensor && (tensor->DataSize() > 1)) {
vmap[GetValueNode(tnode)].push_back(tnode);
}
}
}
if (vmap.empty()) {
return;
}
auto mng = fg->manager();
if (mng == nullptr) {
mng = Manage(fg, false);
fg->set_manager(mng);
}
auto &inputs = *inputs_ptr;
for (auto iter : vmap) {
auto value_nodes = iter.second;
if (value_nodes.empty()) {
MS_LOG(EXCEPTION) << "Invalid value in map!";
}
auto vnode = value_nodes[0];
auto parameter = fg->add_parameter();
parameter->set_abstract(vnode->abstract());
parameter->set_kernel_info(vnode->kernel_info_ptr());
for (const auto &value_node : value_nodes) {
mng->Replace(value_node, parameter);
}
inputs.push_back(vnode);
}
}
// Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs.
std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
AnfNodePtrList *src_outputs = nullptr) {
FuncGraphPtr fg;
AnfNodePtrList inputs;
AnfNodePtrList outputs;
AnfNodePtrList *soutputs = (src_outputs != nullptr) ? src_outputs : &outputs;
std::tie(fg, inputs, *soutputs) = compile::TransformSegmentToAnfGraph(fuse_nodes);
FuncGraphManagerPtr mng = fg->manager();
if (mng == nullptr) {
mng = Manage(fg, false);
fg->set_manager(mng);
}
// Inline origin graphkernel
auto cnodes = fg->GetOrderedCnodes();
for (const auto &n : cnodes) {
if (!AnfAlgo::IsGraphKernel(n)) {
continue;
}
auto graph_kernel_g = GetValueNode<FuncGraphPtr>(n->input(0));
AnfNodePtrList ins;
ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end());
auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope());
mng->Replace(n, out);
}
EliminateMakeTuple(fg, mng);
ConvertComplexTensorToParameter(fg, &inputs);
outputs.clear();
kernel::GetFuncGraphOutputNodes(fg, &outputs);
return std::make_tuple(fg, inputs, outputs);
}
} // namespace
void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
@ -400,6 +485,7 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &new_f
}
std::vector<AnfNodePtr> fn_inputs;
size_t offset = 0;
for (size_t out_idx = 0; out_idx < outputs.size(); out_idx++) {
AnfNodePtrList real_outs;
// not make tuple out, replace
@ -427,7 +513,7 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &new_f
auto value_node = value_input->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(value_node);
int item_idx = GetValue<int>(value_node->value());
int new_item_idx = SizeToInt(out_idx) + item_idx;
int new_item_idx = SizeToInt(out_idx) + offset + item_idx;
fn_inputs.clear();
fn_inputs.push_back(NewValueNode(prim::kPrimTupleGetItem));
fn_inputs.push_back(new_fuse_cnode);
@ -436,6 +522,8 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &func_graph, const AnfNodePtr &new_f
new_out->set_abstract(get_item_cnode->abstract());
mng->Replace(get_item_cnode, new_out);
}
offset += real_outs.size() - 1;
}
}
@ -454,31 +542,17 @@ void FuseNodesToSubGraph(const std::vector<AnfNodePtr> &fuse_nodes,
FuncGraphPtr fg;
AnfNodePtrList inputs;
AnfNodePtrList src_outputs;
AnfNodePtrList outputs;
std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes);
// Remove nest make tuple in outs
auto expand_out = GetExpandOuts(outputs);
auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, expand_out, is_before_kernel_select);
std::tie(fg, inputs, outputs) = MixedNodesTransToGraph(fuse_nodes, &src_outputs);
auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select);
if (!is_before_kernel_select) {
SetNewKernelInfo(fuse_new_node, fg, inputs, expand_out, AnfAlgo::GetProcessor(fuse_nodes[0]));
SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0]));
}
ReplaceNewFuseCNode(kernel_graph, fuse_new_node, outputs);
// Handle get-item probleam.
ReplaceNewFuseCNode(kernel_graph, fuse_new_node, src_outputs);
// Inline origin graphkernel
auto cnodes = fg->GetOrderedCnodes();
for (const auto &n : cnodes) {
if (!AnfAlgo::IsGraphKernel(n)) {
continue;
}
auto graph_kernel_g = GetValueNode<FuncGraphPtr>(n->input(0));
AnfNodePtrList ins;
ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end());
auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope());
mng->Replace(n, out);
}
EliminateMakeTuple(fg, mng);
// set graphKernel attr
std::string fuse_op_name = "";
for (auto &fuse_node : fuse_nodes) {
@ -512,32 +586,45 @@ bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, n
if (is_single_graph_kernel) {
fg = AnfAlgo::GetCNodeFuncGraphPtr(nodes[0]);
kernel::GetValidKernelNodes(fg, &op_nodes, &inputs, &outputs);
return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map);
} else if (!has_graph_kernel) {
std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(nodes);
op_nodes = nodes;
return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map);
} else {
// When there are basic and composite ops, the composite ops should be inline to the basic ones' graph,
// so a new graph generation should be done (beacuse they may in the main graph!).
// If address_node_map is wanted, we should map the new node in new graph to the old nodes. But... not support now.
MS_LOG(EXCEPTION) << "No support mixed with basic and composite ops now!";
}
std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(nodes);
auto mng = Manage(fg, false);
fg->set_manager(mng);
// Inline origin graph kernel
auto fg_nodes = fg->GetOrderedCnodes();
for (auto const &n : fg_nodes) {
if (!AnfAlgo::IsGraphKernel(n)) {
continue;
}
auto graph_kernel_g = GetValueNode<FuncGraphPtr>(n->input(0));
AnfNodePtrList ins;
ins.insert(ins.end(), n->inputs().begin() + 1, n->inputs().end());
auto out = InlineClone(graph_kernel_g, fg, ins, n->input(0)->scope());
mng->Replace(n, out);
return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map);
}
bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, nlohmann::json *op_desc) {
MS_EXCEPTION_IF_NULL(op_desc);
if (nodes.empty()) {
MS_LOG(ERROR) << "Input nodes is empty.";
return false;
}
inputs.clear();
outputs.clear();
FuncGraphPtr fg;
AnfNodePtrList op_nodes, inputs, outputs;
if (nodes.size() == 1 && AnfAlgo::IsGraphKernel(nodes[0])) {
fg = AnfAlgo::GetCNodeFuncGraphPtr(nodes[0]);
} else {
std::tie(fg, inputs, outputs) = MixedNodesTransToGraph(nodes);
inputs.clear();
outputs.clear();
}
kernel::GetValidKernelNodes(fg, &op_nodes, &inputs, &outputs);
return GenJson(op_nodes, inputs, outputs, dump_option, op_desc, address_node_map);
auto mng = fg->manager();
if (mng == nullptr) {
mng = Manage(fg, false);
fg->set_manager(mng);
}
return GenJson(op_nodes, inputs, outputs, dump_option, op_desc);
}
bool AnfToJsonDesc(const std::vector<AnfNodePtrList> &graphs, const DumpOption &dump_option, nlohmann::json *op_desc) {

@ -47,8 +47,9 @@ void ReplaceNewFuseCNode(const FuncGraphPtr &kernel_graph, const AnfNodePtr &new
void FuseNodesToSubGraph(const std::vector<AnfNodePtr> &fuse_nodes,
const std::shared_ptr<session::KernelGraph> &kernel_graph, const std::string &postfix,
bool is_before_kernel_select);
bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, nlohmann::json *op_desc);
bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, nlohmann::json *op_desc,
std::map<std::string, AnfNodePtr> *address_node_map = nullptr);
std::map<std::string, AnfNodePtr> *address_node_map);
bool AnfToJsonDesc(const std::vector<AnfNodePtrList> &graphs, const DumpOption &dump_option, nlohmann::json *op_desc);
FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNodePtr> &inputs);
std::unordered_set<PrimitivePtr> GetExpandOps();

@ -0,0 +1,45 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/graph_kernel/value_graph_binder.h"
#include <unordered_set>
#include "frontend/optimizer/irpass.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
namespace mindspore {
namespace opt {
bool BindValueToGraph::Run(const FuncGraphPtr &func_graph) {
MS_EXCEPTION_IF_NULL(func_graph);
auto todos = TopoSort(func_graph->get_return());
auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(func_graph);
MS_EXCEPTION_IF_NULL(kernel_graph);
auto &value_nodes = kernel_graph->graph_value_nodes();
bool changed = false;
for (auto node : todos) {
if (!GetValueNode<tensor::TensorPtr>(node)) {
continue;
}
if (auto vptr = node->cast<ValueNodePtr>(); value_nodes.count(vptr) == 0) {
kernel_graph->AddValueNodeToGraph(vptr);
changed = true;
}
}
return changed;
}
} // namespace opt
} // namespace mindspore

@ -0,0 +1,33 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_VALUE_GRAPH_BINDER_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_VALUE_GRAPH_BINDER_H_
#include <memory>
#include "ir/func_graph.h"
#include "backend/optimizer/common/pass.h"
namespace mindspore {
namespace opt {
class BindValueToGraph : public Pass {
public:
BindValueToGraph() : Pass("bind_value_to_graph") {}
~BindValueToGraph() override = default;
bool Run(const FuncGraphPtr &func_graph);
};
using BindValueToGraphPtr = std::shared_ptr<BindValueToGraph>;
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_VALUE_GRAPH_BINDER_H_

@ -38,6 +38,7 @@
#include "backend/optimizer/gpu/remove_format_transform_pair.h"
#include "backend/optimizer/gpu/remove_redundant_format_transform.h"
#include "backend/optimizer/gpu/cudnn_inplace_fusion.h"
#include "backend/optimizer/graph_kernel/value_graph_binder.h"
#include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
#include "backend/optimizer/graph_kernel/graph_kernel_expander.h"
#include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
@ -116,6 +117,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_
pm->AddPass(std::make_shared<opt::BasicOpsFusion>());
pm->AddPass(std::make_shared<opt::CompositeOpsFusion>());
pm->AddPass(std::make_shared<opt::GraphKernelSplitter>());
pm->AddPass(std::make_shared<opt::BindValueToGraph>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(kernel_graph);
kernel_graph->SetExecOrderByDefault();

Loading…
Cancel
Save