GraphKernel supports multi-output kernels

pull/7013/head
dayschan 4 years ago
parent 5dbbcacadd
commit 7599686a72

File diff suppressed because it is too large Load Diff

@ -196,8 +196,7 @@ class CompositeGraph:
shape, dtype, data_format, name=name, para_type=Tensor.PARA_OUTPUT)
cur_fusion = None
for op in desc['op_desc']:
inputs = [self.tensors[d[0]['tensor_name']]
for d in op['input_desc'] if 'value' not in d[0]]
inputs = [self.tensors[d['tensor_name']] for x in op['input_desc'] for d in x if 'value' not in d]
out_desc = op['output_desc']
name, shape, dtype, data_format = out_desc[0]['tensor_name'], out_desc[
0]['shape'], out_desc[0]['data_type'], out_desc[0]['format']
@ -263,7 +262,7 @@ class CompositeGraph:
self.tensors[y], True)
inplace_desc = copy.deepcopy(d)
inplace_desc['attr'] = {'name': 'fake_output', 'value': fake}
z_desc, out_desc = inplace_desc['input_desc'][2][0].inplace_desc['output_desc'][0]
z_desc, out_desc = inplace_desc['input_desc'][2][0], inplace_desc['output_desc'][0]
z_desc['shape'] = z.shape
z_desc['data_type'] = z.dtype
z_desc['tensor_name'] = z.name

@ -26,10 +26,12 @@ def split_with_json(json_str: str):
try:
graph_desc = json.loads(json_str)
comp = model.load_composite(graph_desc)
graph_split = model.split(comp.graph)
graph_split, graph_mode = model.split(comp.graph)
is_multi_graph = len(graph_split) > 1
graph_list = list(map(comp.dump, graph_split))
result = {"multi_graph": is_multi_graph, "graph_desc": graph_list}
result = {"multi_graph": is_multi_graph,
"graph_desc": graph_list,
"graph_mode": graph_mode}
return json.dumps(result)
except jd.JSONDecodeError:
logger.error(traceback.format_exc())

@ -1,53 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===========================================================================
"""test split"""
import model
def graph_1():
gb = model.GraphBuilder()
with gb.graph_scope("main"):
a = gb.tensor([1024, 16], "float32", name="a")
b = gb.emit("Abs", a, 'b')
c = gb.emit("Abs", b, 'c')
d = gb.emit("Abs", c, 'd')
gb.emit("TensorAdd", [b, d], "e")
return gb.get()[0]
def graph_2():
gb = model.GraphBuilder()
with gb.graph_scope("main"):
a = gb.tensor([1024, 16], "float32", name="a")
b = gb.emit("Abs", a, 'b')
c = gb.emit("Abs", b, 'c')
d = gb.emit("ReduceSum", c, 'd', attrs={'reduce_axis': (1,)})
gb.emit("Sqrt", d, 'e')
return gb.get()[0]
def test_split_by_pattern():
def _test(graph):
print("***************** main graph ***************")
print(graph)
subgraphs = model.split(graph)
for i, g in enumerate(subgraphs):
print('------------- subgraph {} --------------'.format(i))
print(g)
_test(graph_2())
if __name__ == '__main__':
test_split_by_pattern()

@ -485,7 +485,7 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf
(*kernel_json)[kJsonKeyPlatform] = "AKG";
(*kernel_json)[kJsonKeyProcess] = GetProcessorStr(anf_nodes[0]);
(*kernel_json)[kJsonKeyComposite] = true;
(*kernel_json)[kJsonKeyCompositeGraph] = fg->ToString();
(*kernel_json)[kJsonKeyCompositeGraph] = fg->ToString() + "." + fg->debug_info()->get_id();
if (!GetIOSize(*kernel_json, &input_size_list_, &output_size_list_)) {
MS_LOG(ERROR) << "Cal mem size failed.";

@ -37,22 +37,17 @@ namespace opt {
namespace {
bool IsBasicOp(const AnfNodePtr &node, bool is_before_kernel_select) {
#if ENABLE_D
std::vector<PrimitivePtr> fusable_basic_ops = {prim::kPrimTensorAdd, prim::kPrimMul, prim::kPrimSub,
std::vector<PrimitivePtr> fusible_basic_ops = {prim::kPrimTensorAdd, prim::kPrimMul, prim::kPrimSub,
prim::kPrimExpandDims};
if (!is_before_kernel_select) {
fusable_basic_ops.push_back(prim::kPrimCast);
fusible_basic_ops.push_back(prim::kPrimCast);
}
#elif ENABLE_GPU
std::vector<PrimitivePtr> fusable_basic_ops = {
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
prim::kPrimGreater, prim::kPrimAssign};
std::vector<PrimitivePtr> fusible_basic_ops = GetFusibleOpList();
#else
std::vector<PrimitivePtr> fusable_basic_ops;
std::vector<PrimitivePtr> fusible_basic_ops;
#endif
return std::any_of(fusable_basic_ops.begin(), fusable_basic_ops.end(),
return std::any_of(fusible_basic_ops.begin(), fusible_basic_ops.end(),
[&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); });
}

@ -49,12 +49,7 @@ bool IsBasicFuseOp(const AnfNodePtr &node, bool is_before_kernel_select) {
basic_ops.push_back(prim::kPrimCast);
}
#elif ENABLE_GPU
std::vector<PrimitivePtr> basic_ops = {
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
prim::kPrimGreater, prim::kPrimAssign};
std::vector<PrimitivePtr> basic_ops = GetFusibleOpList();
#else
std::vector<PrimitivePtr> basic_ops;
#endif

@ -26,8 +26,8 @@
#include "ir/func_graph_cloner.h"
#include "ir/func_graph.h"
#include "backend/optimizer/pass/const_input_to_attr_registry.h"
#ifdef ENABLE_D
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#if ENABLE_GPU
#include "runtime/device/gpu/kernel_info_setter.h"
#endif
namespace mindspore {
@ -612,36 +612,6 @@ FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNo
return new_fg;
}
bool JsonDescToAnf(const std::string &json_desc, const std::map<std::string, AnfNodePtr> &address_node_map,
std::vector<AnfNodePtrList> *res_graphs) {
MS_EXCEPTION_IF_NULL(res_graphs);
auto kernel_json = nlohmann::json::parse(json_desc);
if (kernel_json.find(kJsonKeyMultiGraph) == kernel_json.end() || kernel_json[kJsonKeyMultiGraph].is_null()) {
// not multi graphs.
MS_LOG(ERROR) << "Input json is not multi graph, " << json_desc;
return false;
}
kernel::AkgKernelJsonDecoder akg_kernel_json_decoder;
std::vector<nlohmann::json> graph_descs = kernel_json[kJsonKeyGraphDesc];
if (graph_descs.empty()) {
MS_LOG(ERROR) << "No sub graph found, " << json_desc;
return false;
}
for (size_t i = 0; i < graph_descs.size(); ++i) {
const auto &graph_desc = graph_descs[i];
AnfNodePtrList res_graph;
if (!akg_kernel_json_decoder.DecodeSplitNodes(graph_desc, address_node_map, &res_graph)) {
MS_LOG(ERROR) << "Failed decode sub graph, " << graph_desc;
return false;
}
res_graphs->push_back(res_graph);
}
return true;
}
std::unordered_set<PrimitivePtr> GetExpandOps() {
std::unordered_set<PrimitivePtr> expand_ops = {
prim::kPrimSquare,
@ -664,5 +634,23 @@ std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &p
}
return name.str();
}
std::vector<PrimitivePtr> GetFusibleOpList() {
std::vector<PrimitivePtr> fusible_basic_ops = {
prim::kPrimAbs, prim::kPrimRound, prim::kPrimNeg, prim::kPrimExp, prim::kPrimTensorAdd,
prim::kPrimRealDiv, prim::kPrimMul, prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
prim::kPrimPow, prim::kPrimSub, prim::kPrimRsqrt, prim::kPrimSqrt, prim::kPrimCast,
prim::kPrimAddN, prim::kPrimEqual, prim::kPrimReciprocal, prim::KPrimTransData, prim::kPrimSelect,
prim::kPrimGreater, prim::kPrimAssign, prim::kPrimReduceSum};
return fusible_basic_ops;
}
void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type) {
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
#if ENABLE_GPU
device::gpu::SetKernelInfo(cnode, kernel_type);
#endif
}
} // namespace opt
} // namespace mindspore

@ -35,6 +35,7 @@ constexpr auto kGraphKernelSplitFunc = "split_with_json";
constexpr auto kGetGraphKernelOpExpander = "get_op_expander";
constexpr auto kJsonKeyMultiGraph = "multi_graph";
constexpr auto kJsonKeyGraphDesc = "graph_desc";
constexpr auto kJsonKeyGraphMode = "graph_mode";
void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
const AnfNodePtrList &outputs, kernel::Processor processor);
@ -50,10 +51,10 @@ bool AnfToJsonDesc(const AnfNodePtrList &nodes, const DumpOption &dump_option, n
std::map<std::string, AnfNodePtr> *address_node_map = nullptr);
bool AnfToJsonDesc(const std::vector<AnfNodePtrList> &graphs, const DumpOption &dump_option, nlohmann::json *op_desc);
FuncGraphPtr JsonDescToAnf(const std::string &json_desc, const std::vector<AnfNodePtr> &inputs);
bool JsonDescToAnf(const std::string &json_desc, const std::map<std::string, AnfNodePtr> &address_node_map,
std::vector<AnfNodePtrList> *res_graphs);
std::unordered_set<PrimitivePtr> GetExpandOps();
std::string ExtractGraphKernelName(const AnfNodePtrList &cnodes, const string &prefix = "", const string &postfix = "");
std::vector<PrimitivePtr> GetFusibleOpList();
void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type = KernelType::UNKNOWN_KERNEL_TYPE);
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_GRAPH_KERNEL_HELPER_H_

@ -26,6 +26,7 @@
#include "pipeline/jit/parse/python_adapter.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/akg/akg_kernel_json_decoder.h"
#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
#include "debug/anf_ir_dump.h"
@ -203,7 +204,7 @@ class AreaGraph {
}
SortCNodes(main_cnodes);
cnode_group_id->swap(topo_order_); // The topo_order is not used anymore.
*cnode_group_id = std::move(topo_order_); // The topo_order is not used anymore.
return;
}
@ -291,7 +292,7 @@ class AreaGraph {
std::vector<CNodePtr> main_cnodes_sorted;
std::transform(topo_order_.begin(), topo_order_.end(), std::back_inserter(main_cnodes_sorted),
[main_cnodes](int index) { return main_cnodes->at(index); });
main_cnodes->swap(main_cnodes_sorted);
*main_cnodes = std::move(main_cnodes_sorted);
}
// Areas in this subgraph
@ -415,6 +416,9 @@ class Splitter {
cnode->set_input(i, iter->second);
}
}
if (AnfAlgo::IsRealKernel(node)) {
ResetKernelInfo(node);
}
}
}
return output;
@ -445,7 +449,7 @@ class Splitter {
tmp_subgraph_cnodes.push_back(new_subgraph_cnodes_[i]);
}
}
new_subgraph_cnodes_.swap(tmp_subgraph_cnodes);
new_subgraph_cnodes_ = std::move(tmp_subgraph_cnodes);
TraverseFuncGraph(main_func_graph_, [&replace_map](const AnfNodePtr &node) {
auto cnode = node->cast<CNodePtr>();
@ -580,15 +584,38 @@ class CostModelSplitSchemer : public Splitter::SplitSchemer {
return false;
}
// recover json to anf-ir.
split_plan_.clear();
if (!JsonDescToAnf(split_graphs_str, address_node_map, &split_plan_)) {
MS_LOG(ERROR) << "Failed to decode split graphs.";
if (!DecodeJson(split_graphs_str, address_node_map)) {
MS_LOG(ERROR) << "Failed to decode split graphs. input json:\n" << split_graphs_str;
return false;
}
return true;
}
virtual bool DecodeJson(const std::string &json_desc, const std::map<std::string, AnfNodePtr> &address_node_map) {
auto kernel_json = nlohmann::json::parse(json_desc);
kernel::AkgKernelJsonDecoder akg_kernel_json_decoder;
std::vector<nlohmann::json> graph_descs = kernel_json[kJsonKeyGraphDesc];
std::vector<std::string> graph_modes = kernel_json[kJsonKeyGraphMode];
if (graph_modes.size() != graph_descs.size()) {
MS_LOG(ERROR) << "Size of graph_mode " << graph_modes.size() << " mismatch graph_desc " << graph_descs.size();
return false;
}
// recover json to anfnode.
split_plan_.clear();
for (const auto &graph_desc : graph_descs) {
AnfNodePtrList res_graph;
if (!akg_kernel_json_decoder.DecodeSplitNodes(graph_desc, address_node_map, &res_graph)) {
MS_LOG(ERROR) << "Failed decode sub graph, " << graph_desc;
return false;
}
split_plan_.push_back(std::move(res_graph));
}
// The info should be returned from costmodel.
need_inline_.assign(split_plan_.size(), 0);
// ops to be inlined.
need_inline_.clear();
std::transform(graph_modes.begin(), graph_modes.end(), std::back_inserter(need_inline_),
[](const std::string &mode) { return mode == "basic" ? 1 : 0; });
return true;
}

@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
PYTHONPATH="$(pwd)/..:${PYTHONPATH}"
PYTHONPATH="$(pwd)/../../../../mindspore/_extends/graph_kernel:${PYTHONPATH}"
export PYTHONPATH

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save