host cpu support dynamic shape

pull/8846/head
liubuyu 4 years ago
parent 42cbdfcafc
commit 9f5ab8f76f

@ -289,14 +289,14 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
return true;
}
uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset) {
uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) {
// deal1: unknown shape type
auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
info->infoLen = sizeof(int32_t);
ext_info_offset += kExtInfoHeadSize;
auto *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
*shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
*shape_type = type;
ext_info_offset += info->infoLen;
return ext_info_offset;
}
@ -401,7 +401,11 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
ext_info.resize(ext_info_len, 0);
char *ext_info_buf = ext_info.data();
ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset);
UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;
if (AnfAlgo::GetCNodeName(anf_node) == "Unique") {
shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
}
ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type);
ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);
ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);

@ -18,6 +18,7 @@
#include <algorithm>
#include <map>
#include <set>
#include <stack>
#include "ir/anf.h"
#include "ir/func_graph.h"
#include "base/core_ops.h"
@ -30,6 +31,7 @@
#include "backend/kernel_compiler/kernel_build_info.h"
#include "common/trans.h"
#include "abstract/param_validator.h"
#include "abstract/primitive_infer_map.h"
#include "pipeline/jit/static_analysis/static_analysis.h"
#include "utils/trace_base.h"
@ -820,6 +822,8 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableWorkspaceAddr(const AnfNodePtr &
void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> &types,
const std::vector<std::vector<size_t>> &shapes, AnfNode *node) {
MS_EXCEPTION_IF_NULL(node);
auto node_ptr = node->cast<AnfNodePtr>();
MS_EXCEPTION_IF_NULL(node_ptr);
if (types.size() != shapes.size()) {
MS_LOG(EXCEPTION) << "Types size " << types.size() << "should be same with shapes size " << shapes.size()
<< " trace: " << trace::DumpSourceLines(node);
@ -829,16 +833,23 @@ void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> &
} else if (shapes.size() == 1) {
// single output handle
ShapeVector shape_int;
auto max_shape = GetOutputMaxShape(node_ptr, 0);
auto min_shape = GetOutputMinShape(node_ptr, 0);
std::transform(shapes[0].begin(), shapes[0].end(), std::back_inserter(shape_int), SizeToLong);
auto abstract = std::make_shared<AbstractTensor>(TypeIdToType(types[0]), shape_int);
auto abstract = std::make_shared<AbstractTensor>(
TypeIdToType(types[0]), std::make_shared<abstract::Shape>(shape_int, min_shape, max_shape));
node->set_abstract(abstract);
} else {
// multiple output handle
std::vector<AbstractBasePtr> abstract_list;
for (size_t i = 0; i < types.size(); ++i) {
ShapeVector shape_int;
auto max_shape = GetOutputMaxShape(node_ptr, i);
auto min_shape = GetOutputMinShape(node_ptr, i);
std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(shape_int), SizeToLong);
abstract_list.emplace_back(std::make_shared<AbstractTensor>(TypeIdToType(types[i]), shape_int));
auto abstract = std::make_shared<AbstractTensor>(
TypeIdToType(types[i]), std::make_shared<abstract::Shape>(shape_int, min_shape, max_shape));
abstract_list.emplace_back(abstract);
}
auto abstract_tuple = std::make_shared<AbstractTuple>(abstract_list);
node->set_abstract(abstract_tuple);
@ -1409,7 +1420,7 @@ std::vector<int64_t> AnfRuntimeAlgorithm::GetOutputMinShape(const AnfNodePtr &an
}
}
bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) {
bool IsNodeOutputDynamicShape(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
auto base_shape = node->Shape();
if (base_shape == nullptr) {
@ -1436,6 +1447,66 @@ bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) {
return false;
}
bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
for (size_t i = 0; i < input_num; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
auto input = input_with_index.first;
auto index = input_with_index.second;
MS_EXCEPTION_IF_NULL(input);
auto base_shape = input->Shape();
if (base_shape == nullptr) {
MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope();
continue;
}
if (base_shape->isa<abstract::Shape>()) {
if (IsShapeDynamic(base_shape->cast<abstract::ShapePtr>())) {
return true;
}
} else if (base_shape->isa<abstract::TupleShape>()) {
auto tuple_shape = base_shape->cast<abstract::TupleShapePtr>();
MS_EXCEPTION_IF_NULL(tuple_shape);
if (index >= tuple_shape->size()) {
MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index
<< " and tuple_shape size:" << tuple_shape->size();
continue;
}
auto b_shp = (*tuple_shape)[index];
if (!b_shp->isa<abstract::Shape>()) {
continue;
}
if (IsShapeDynamic(b_shp->cast<abstract::ShapePtr>())) {
return true;
}
}
}
return false;
}
bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(WARNING) << "Node is not a cnode";
return false;
}
auto cnode = node->cast<CNodePtr>();
auto in_dynamic = IsNodeInputDynamicShape(cnode);
auto out_dynamic = IsNodeOutputDynamicShape(cnode);
if (in_dynamic && !AnfAlgo::HasNodeAttr(kAttrInputIsDynamicShape, cnode)) {
AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode);
MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
}
if (out_dynamic && !AnfAlgo::HasNodeAttr(kAttrOutputIsDynamicShape, cnode)) {
AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode);
MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
}
return in_dynamic || out_dynamic;
}
std::vector<size_t> AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) {
auto device_shape = GetInputDeviceShape(anf_node, index);
// Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse.
@ -1500,5 +1571,50 @@ void AnfRuntimeAlgorithm::GetAllFatherRealNode(const AnfNodePtr &anf_node, std::
GetAllFatherRealNode(cnode->input(kDependAttachNodeIndex), result, visited);
}
}
void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
MS_LOG(INFO) << "InferShape start, node:" << node->DebugString();
auto inputs = node->inputs();
if (inputs.empty()) {
MS_LOG(EXCEPTION) << "Invalid inputs";
}
AbstractBasePtrList args_spec_list;
auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
auto input_size = AnfAlgo::GetInputTensorNum(node);
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
auto real_input = input_with_index.first;
MS_EXCEPTION_IF_NULL(real_input);
auto cnode_input = node->input(i + 1);
MS_EXCEPTION_IF_NULL(cnode_input);
if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
auto base_shape = real_input->Shape();
if (!base_shape->isa<abstract::TupleShape>()) {
MS_LOG(EXCEPTION) << "Node:" << node->DebugString()
<< " input is a tuple_get_item but real input node shape is not a TupleShape";
}
auto tuple_ptr = base_shape->cast<abstract::TupleShapePtr>();
MS_EXCEPTION_IF_NULL(tuple_ptr);
auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
auto real_shape = tuple_ptr->shape().at(tuple_get_item_index);
auto abstract_tensor = cnode_input->abstract()->cast<abstract::AbstractTensorPtr>();
MS_EXCEPTION_IF_NULL(abstract_tensor);
args_spec_list.emplace_back(std::make_shared<abstract::AbstractTensor>(abstract_tensor->element(), real_shape));
} else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
args_spec_list.emplace_back(cnode_input->abstract());
} else {
args_spec_list.emplace_back(real_input->abstract());
}
}
auto &prim_eval_implement_map = abstract::GetPrimitiveToEvalImplMap();
auto ret = prim_eval_implement_map.find(primitive);
if (ret == prim_eval_implement_map.end()) {
MS_LOG(EXCEPTION) << "Get infer shape function failed, primitive name:" << primitive->name()
<< " primitive type:" << primitive->type_name();
}
auto eval_result = ret->second.impl_(nullptr, primitive, args_spec_list);
node->set_abstract(eval_result);
}
} // namespace session
} // namespace mindspore

@ -230,6 +230,7 @@ class AnfRuntimeAlgorithm {
static std::vector<int64_t> GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index);
static std::vector<int64_t> GetOutputMinShape(const AnfNodePtr &anf_node, size_t index);
static bool IsNodeDynamicShape(const AnfNodePtr &node);
static void InferShape(const CNodePtr &node);
static std::vector<size_t> GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
static std::vector<size_t> GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
// Find control_depend real input nodes.

@ -65,6 +65,8 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr
auto graph_id = graph_sum_;
auto graph = ConstructKernelGraph(lst, outputs);
MS_EXCEPTION_IF_NULL(graph);
UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
graph->UpdateGraphDynamicAttr();
MS_LOG(INFO) << "Set kernel info";
SetKernelInfo(graph.get());
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
@ -87,7 +89,7 @@ void CPUSession::CreateOutputTensors(const GraphId &graph_id, const std::vector<
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
auto kernel_graph = GetGraph(graph_id);
MS_EXCEPTION_IF_NULL(kernel_graph);
runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs);
runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs, tensor_to_node);
}
void CPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,

@ -47,6 +47,41 @@ static std::shared_ptr<std::map<ValuePtr, ParameterPtr>> python_paras;
void ClearPythonParasMap() { python_paras = nullptr; }
namespace {
const int kSummaryGetItem = 2;
bool IsUsedByRealKernel(const FuncGraphManagerPtr &manager, const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(manager);
MS_EXCEPTION_IF_NULL(node);
auto node_users = manager->node_users()[node];
for (auto item : node_users) {
if (AnfAlgo::IsRealKernel(item.first)) {
return true;
}
}
return false;
}
bool IsUsedByDynamicKernel(const FuncGraphManagerPtr &manager, const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(manager);
MS_EXCEPTION_IF_NULL(node);
auto node_users = manager->node_users()[node];
for (auto item : node_users) {
if (item.first->isa<CNode>() && AnfAlgo::IsNodeDynamicShape(item.first->cast<CNodePtr>())) {
return true;
}
}
return false;
}
bool CheckIfNeedCreateOutputTensor(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
if (node->isa<Parameter>()) {
auto node_ptr = node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(node_ptr);
if (!node_ptr->is_used_by_real_kernel()) {
return true;
}
}
return false;
}
ValuePtr GetParamDefaultValue(const AnfNodePtr &node) {
if (node == nullptr) {
@ -114,6 +149,8 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(tensor_to_node);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
MS_LOG(INFO) << "Create tensor for output[" << node->DebugString() << "] index[" << node_output_pair.second << "]";
// if node is a value node, no need sync addr from device to host
if (node->isa<ValueNode>()) {
@ -121,7 +158,8 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,
MS_EXCEPTION_IF_NULL(value_node);
return value_node->value();
}
if (!AnfAlgo::OutputAddrExist(node, output_index)) {
if (!AnfAlgo::OutputAddrExist(node, output_index) ||
(CheckIfNeedCreateOutputTensor(node) && ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode)) {
if (node->isa<Parameter>()) {
for (size_t input_idx = 0; input_idx < graph->inputs().size(); input_idx++) {
if (input_idx >= input_tensors.size()) {
@ -875,9 +913,21 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
// Update Graph Dynamic Shape Attr
UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
opt::BackendCommonOptimization(graph);
graph->SetInputNodes();
auto input_nodes = graph->input_nodes();
for (auto input_node : input_nodes) {
if (input_node->isa<Parameter>()) {
auto node_ptr = input_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(node_ptr);
if (!IsUsedByRealKernel(manager, input_node)) {
node_ptr->set_used_by_real_kernel();
}
if (IsUsedByDynamicKernel(manager, input_node)) {
node_ptr->set_used_by_dynamic_kernel();
}
}
}
graph->SetOptimizerFlag();
return graph;
}
@ -950,7 +1000,22 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructKernelGraph(const FuncGraphP
MS_LOG_EXCEPTION << "construct func graph " << func_graph->ToString() << "fail!";
}
}
AddParameterToGraphInputs(func_graph->parameters(), graph.get());
FuncGraphManagerPtr manager = MakeManager({graph});
auto input_nodes = graph->inputs();
for (auto input_node : input_nodes) {
if (input_node->isa<Parameter>()) {
auto node_ptr = input_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(node_ptr);
if (!IsUsedByRealKernel(manager, input_node)) {
node_ptr->set_used_by_real_kernel();
}
if (IsUsedByDynamicKernel(manager, input_node)) {
node_ptr->set_used_by_dynamic_kernel();
}
}
}
graph->SetExecOrderByDefault();
if (ExistSummaryNode(graph.get())) {
graph->set_summary_node_exist(true);
@ -1021,14 +1086,23 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
MS_EXCEPTION_IF_NULL(tensor);
auto input_node = input_nodes[i];
MS_EXCEPTION_IF_NULL(input_node);
auto size = LongToSize(tensor->data().nbytes());
if (input_node->isa<Parameter>() && input_node->cast<ParameterPtr>()->is_used_by_dynamic_kernel()) {
auto tensor_shape = tensor->shape();
std::vector<size_t> shape_tmp;
(void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize);
AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape_tmp},
input_node.get());
size = trans::ShapeSize(shape_tmp) * trans::TypeIdSize(tensor->data_type());
}
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0) && TensorNeedSync(input_node, tensor)) {
auto device_address = AnfAlgo::GetMutableOutputAddr(input_node, 0);
MS_EXCEPTION_IF_NULL(device_address);
if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(input_node, 0),
LongToSize(tensor->data().nbytes()), tensor->data_type(),
tensor->data_c())) {
if (size != 0 && !device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(input_node, 0), size,
tensor->data_type(), tensor->data_c())) {
MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
}
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode ||
AnfAlgo::IsParameterWeight(input_node->cast<ParameterPtr>())) {
tensor->set_device_address(device_address);
@ -1543,55 +1617,6 @@ void SessionBasic::RunGraphAsync(const GraphId &graph_id, const std::vector<tens
executor_->RunGraphAsync(shared_from_this(), graph_id, inputs, outputs);
}
bool IsDynamicShape(const NotNull<abstract::ShapePtr> &shape) {
return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < 0; });
}
bool IsNodeOutputDynamicShape(const CNodePtr &anf_node_ptr) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
return AnfAlgo::IsNodeDynamicShape(anf_node_ptr);
}
bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
for (size_t i = 0; i < input_num; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
auto input = input_with_index.first;
auto index = input_with_index.second;
MS_EXCEPTION_IF_NULL(input);
auto base_shape = input->Shape();
if (base_shape == nullptr) {
MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope();
continue;
}
if (base_shape->isa<abstract::Shape>()) {
if (IsDynamicShape(NOT_NULL(base_shape->cast<abstract::ShapePtr>()))) {
return true;
}
} else if (base_shape->isa<abstract::TupleShape>()) {
auto tuple_shape = base_shape->cast<abstract::TupleShapePtr>();
MS_EXCEPTION_IF_NULL(tuple_shape);
if (index >= tuple_shape->size()) {
MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index
<< " and tuple_shape size:" << tuple_shape->size();
continue;
}
auto b_shp = (*tuple_shape)[index];
if (!b_shp->isa<abstract::Shape>()) {
continue;
}
if (IsDynamicShape(NOT_NULL(b_shp->cast<abstract::ShapePtr>()))) {
return true;
}
}
}
return false;
}
void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphPtr> &all_graphs) {
bool is_dynamic = false;
for (const auto &graph : all_graphs) {
@ -1605,20 +1630,10 @@ void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphP
void SessionBasic::UpdateGraphDynamicShapeAttr(const NotNull<KernelGraphPtr> &root_graph) {
for (const auto &cnode : root_graph->execution_order()) {
auto output_dynamic = IsNodeOutputDynamicShape(NOT_NULL(cnode));
auto input_dynamic = IsNodeInputDynamicShape(NOT_NULL(cnode));
if (output_dynamic || input_dynamic) {
if (AnfAlgo::IsNodeDynamicShape(cnode)) {
AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), cnode);
MS_LOG(INFO) << "Set Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
}
if (output_dynamic) {
AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode);
MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
}
if (input_dynamic) {
AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode);
MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
}
}
root_graph->UpdateGraphDynamicAttr();
}

@ -532,7 +532,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size
}
if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NDHWC) {
if (type_id_ == type) {
SyncMemory(ptr_, host_ptr, size_, RT_MEMCPY_HOST_TO_DEVICE);
SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE);
sync_ok = true;
} else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) {
sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size);

@ -66,11 +66,15 @@ void AiCpuDynamicKernel::Initialize() {
input_num_ = AnfAlgo::GetInputTensorNum(cnode_ptr_);
output_num_ = AnfAlgo::GetOutputTensorNum(cnode_ptr_);
UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;
if (AnfAlgo::GetCNodeName(cnode_ptr_) == "Unique") {
shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
}
// Parse aicpu ext info
if (is_dynamic_shape_) {
MS_EXCEPTION_IF_NULL(cnode_ptr_);
ext_info_handler_ =
std::make_shared<AicpuExtInfoHandler>(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, DEPEND_COMPUTE);
std::make_shared<AicpuExtInfoHandler>(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, shape_type);
ext_info_handler_->Parse(ext_info_data_);
}

@ -19,6 +19,7 @@
#include <memory>
#include <numeric>
#include <utility>
#include <algorithm>
#include <functional>
#include "backend/kernel_compiler/kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
@ -129,9 +130,11 @@ DeviceAddressPtr CPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t
return std::make_shared<CPUDeviceAddress>(device_ptr, device_size, format, type_id);
}
tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node,
size_t index) {
tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(
session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(tensor_to_node);
size_t output_size = AnfAlgo::GetOutputTensorNum(node);
if (index >= output_size) {
MS_LOG(EXCEPTION) << "Invalid input index " << index;
@ -166,13 +169,16 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *k
}
(void)bound_addresses_.insert(address);
}
session::KernelWithIndex node_index(node, index);
tensor->SetNeedWait(true);
tensor->SetIsGraphOutput();
(*tensor_to_node)[tensor] = node_index;
return tensor;
}
BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_graph,
const session::KernelWithIndex &kernel_with_index) {
const session::KernelWithIndex &kernel_with_index,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
auto &input_node = kernel_with_index.first;
auto index = kernel_with_index.second;
MS_EXCEPTION_IF_NULL(input_node);
@ -183,12 +189,12 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_grap
VectorRef ret;
for (size_t i = 1; i < node->inputs().size(); i++) {
auto item_with_index = AnfAlgo::VisitKernelWithReturnType(node->input(i), 0);
auto out = CreatTensorForOutput(kernel_graph, item_with_index);
auto out = CreatTensorForOutput(kernel_graph, item_with_index, tensor_to_node);
ret.push_back(out);
}
return ret;
}
return CreatTensorForOutput(kernel_graph, node, index);
return CreatTensorForOutput(kernel_graph, node, index, tensor_to_node);
} else if (input_node->isa<Parameter>()) {
auto iter = input_param_tensor_map_.find(input_node);
if (iter != input_param_tensor_map_.end()) {
@ -203,9 +209,11 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_grap
}
void CPUKernelRuntime::CreateOutputTensors(session::KernelGraph *kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(outputs);
MS_EXCEPTION_IF_NULL(tensor_to_node);
auto &input_nodes = kernel_graph->inputs();
if (input_nodes.size() != inputs.size()) {
MS_LOG(EXCEPTION) << "Input size not equal to input node size!";
@ -222,7 +230,7 @@ void CPUKernelRuntime::CreateOutputTensors(session::KernelGraph *kernel_graph,
auto output_nodes = kernel_graph->outputs();
for (const auto &item : output_nodes) {
auto item_with_index = AnfAlgo::VisitKernelWithReturnType(item, 0, true);
auto out = CreatTensorForOutput(kernel_graph, item_with_index);
auto out = CreatTensorForOutput(kernel_graph, item_with_index, tensor_to_node);
outputs->push_back(std::move(out));
}
}
@ -258,6 +266,12 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker
MS_LOG(EXCEPTION) << "Parameter node sync host to device failed!";
}
}
if (item->cast<ParameterPtr>()->is_used_by_dynamic_kernel()) {
auto tensor_shape = tensor->shape();
std::vector<size_t> shape_tmp;
(void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize);
AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(item, 0)}, {shape_tmp}, item.get());
}
address->ref_count_ = INIT_NODE_REF;
tensor->set_device_address(address);
}
@ -325,6 +339,9 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
#ifdef ENABLE_PROFILE
double start_time = GetTime();
#endif
if (AnfAlgo::IsDynamicShape(kernel)) {
AnfAlgo::InferShape(kernel);
}
std::vector<kernel::AddressPtr> kernel_inputs;
std::vector<kernel::AddressPtr> kernel_workspaces;
std::vector<kernel::AddressPtr> kernel_outputs;

@ -39,7 +39,7 @@ class CPUKernelRuntime : public KernelRuntime {
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
void AssignKernelAddress(session::KernelGraph *kernel_graph);
void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs);
VectorRef *outputs, std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
void BindInputOutput(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs);
void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
@ -53,8 +53,10 @@ class CPUKernelRuntime : public KernelRuntime {
TypeId type_id) override;
private:
tensor::TensorPtr CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index);
BaseRef CreatTensorForOutput(session::KernelGraph *kernel_graph, const session::KernelWithIndex &kernel_with_index);
tensor::TensorPtr CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
BaseRef CreatTensorForOutput(session::KernelGraph *kernel_graph, const session::KernelWithIndex &kernel_with_index,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
void BindInputTensorAddressPtr(const session::KernelGraph &graph, const std::vector<tensor::TensorPtr> &inputs);
void BindOutputTensorAddressPtr(const VectorRef *outputs);
void AssignValueNodeAddress(session::KernelGraph *kernel_graph);

@ -51,17 +51,6 @@ void DynamicKernel::Initialize() {
int DynamicKernel::GetKernelType() { return AnfAlgo::GetKernelType(cnode_ptr_); }
bool IsTupleGetItem(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!anf_node->isa<CNode>()) {
return false;
}
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input0 = cnode->input(0);
return IsPrimitive(input0, prim::kPrimTupleGetItem);
}
void DynamicKernel::RebuildDependTensor() {
depend_tensor_map_.clear();
for (auto depend : depend_list_) {
@ -112,7 +101,7 @@ void DynamicKernel::InferShape() {
auto cnode_input = cnode_ptr_->input(i + 1);
MS_EXCEPTION_IF_NULL(cnode_input);
if (IsTupleGetItem(cnode_input)) {
if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
auto base_shape = real_input->Shape();
if (!base_shape->isa<abstract::TupleShape>()) {
MS_LOG(EXCEPTION) << "Node:" << cnode_ptr_->fullname_with_scope()

@ -259,6 +259,13 @@ class AbstractUndetermined : public AbstractBase {
}
set_shape(std::make_shared<Shape>(shape));
}
explicit AbstractUndetermined(const TypePtr &element_type, const BaseShapePtr &shape = std::make_shared<Shape>())
: AbstractBase(kAnyValue), element_(std::make_shared<AbstractScalar>(kAnyValue, element_type)) {
if (element_type == nullptr) {
MS_LOG(EXCEPTION) << "element_type is nullptr";
}
set_shape(shape);
}
~AbstractUndetermined() override = default;
MS_DECLARE_PARENT(AbstractUndetermined, AbstractBase)
TypePtr BuildType() const override { return std::make_shared<UndeterminedType>(); }
@ -277,6 +284,8 @@ class AbstractTensor : public AbstractUndetermined {
: AbstractUndetermined(element, shape) {}
AbstractTensor(const TypePtr &element_type, const ShapeVector &shape) : AbstractUndetermined(element_type, shape) {}
explicit AbstractTensor(const tensor::TensorPtr &tensor) : AbstractUndetermined(tensor->Dtype(), tensor->shape()) {}
explicit AbstractTensor(const TypePtr &element_type, const BaseShapePtr &shape = std::make_shared<Shape>())
: AbstractUndetermined(element_type, shape) {}
~AbstractTensor() override = default;
MS_DECLARE_PARENT(AbstractTensor, AbstractUndetermined)

@ -26,6 +26,12 @@
namespace mindspore {
namespace abstract {
const std::map<TypeId, size_t> type_map = {{kNumberTypeBool, 1}, {kNumberTypeInt, 4}, {kNumberTypeInt8, 1},
{kNumberTypeInt16, 2}, {kNumberTypeInt32, 4}, {kNumberTypeInt64, 8},
{kNumberTypeUInt, 4}, {kNumberTypeUInt8, 1}, {kNumberTypeUInt16, 2},
{kNumberTypeUInt32, 4}, {kNumberTypeUInt64, 8}, {kNumberTypeFloat, 4},
{kNumberTypeFloat16, 2}, {kNumberTypeFloat32, 4}, {kNumberTypeFloat64, 8}};
ValuePtr ValueJoin(const ValuePtr &value1, const ValuePtr &value2) {
MS_EXCEPTION_IF_NULL(value1);
MS_EXCEPTION_IF_NULL(value2);
@ -291,5 +297,18 @@ ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tenso
auto y_shape = tensor_y_shape->shape();
return std::make_shared<Shape>(RealBroadcast(op, x_shape, y_shape));
}
size_t TypeIdSize(const TypeId data_type) {
const size_t unsupported_type_error = 0;
auto iter = type_map.find(data_type);
if (iter != type_map.end()) {
return iter->second;
}
return unsupported_type_error;
}
size_t ShapeSize(const std::vector<size_t> &shape) {
return std::accumulate(shape.begin(), shape.end(), IntToSize(1), std::multiplies<size_t>());
}
} // namespace abstract
} // namespace mindspore

@ -51,6 +51,9 @@ int64_t GetPositiveAxis(int64_t axis_value, size_t increment);
ShapeVector BroadcastShape(ShapeVector shpx, ShapeVector shpy);
size_t TypeIdSize(const TypeId data_type);
size_t ShapeSize(const std::vector<size_t> &shape);
// Get broadcasted shape for binary element-wise operation
ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tensor_x, const AbstractTensorPtr &tensor_y);
} // namespace abstract

@ -322,9 +322,17 @@ class Parameter : public ANode {
return shared_from_this() == other.shared_from_this();
}
void set_used_by_real_kernel() { is_real_kernel_used_ = false; }
bool is_used_by_real_kernel() { return is_real_kernel_used_; }
void set_used_by_dynamic_kernel() { is_used_by_dynamic_kernel_ = true; }
bool is_used_by_dynamic_kernel() { return is_used_by_dynamic_kernel_; }
private:
std::string name_;
bool has_default_;
bool is_real_kernel_used_ = true;
bool is_used_by_dynamic_kernel_ = false;
ValuePtr default_param_;
// The count of graphs using the parameter.
int used_graph_count_;

@ -29,6 +29,7 @@
#include <type_traits>
#include <typeinfo>
#include "abstract/utils.h"
#include "abstract/abstract_value.h"
namespace mindspore {
@ -581,8 +582,11 @@ void Tensor::data_sync(bool need_wait) const {
if (device_sync_ == nullptr) {
return;
}
std::vector<size_t> shape_tmp;
(void)std::transform(shape().begin(), shape().end(), std::back_inserter(shape_tmp), IntToSize);
auto size = abstract::ShapeSize(shape_tmp) * abstract::TypeIdSize(data_type());
auto address = device_sync_;
if (!address->SyncDeviceToHost(shape(), static_cast<size_t>(data().nbytes()), data_type(), data_c())) {
if (size != 0 && !address->SyncDeviceToHost(shape(), size, data_type(), data_c())) {
MS_LOG(EXCEPTION) << "SyncDeviceToHost failed.";
}
sync_status_ = kNeedSyncHostToDevice;

@ -0,0 +1,70 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.unique = P.Unique().add_prim_attr("primitive_target", "CPU")
def construct(self, x):
x, y = self.unique(x)
return (x, y)
class UniqueSquare(nn.Cell):
def __init__(self):
super(UniqueSquare, self).__init__()
self.unique = P.Unique().add_prim_attr("primitive_target", "CPU")
self.square = P.Square()
def construct(self, x):
x, _ = self.unique(x)
return self.square(x)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_unique_ascend():
x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
unique = Net()
output = unique(x)
expect1 = np.array([1, 2, 3])
expect2 = np.array([0, 0, 1, 1, 2, 2])
assert (output[0].asnumpy() == expect1).all()
assert (output[1].asnumpy() == expect2).all()
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_unique_square():
x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
net = UniqueSquare()
output = net(x)
expect1 = np.array([1, 4, 9])
assert (output.asnumpy() == expect1).all()

@ -0,0 +1,69 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.unique = P.Unique()
def construct(self, x):
return self.unique(x)
class UniqueSquare(nn.Cell):
def __init__(self):
super(UniqueSquare, self).__init__()
self.unique = P.Unique()
self.square = P.Square()
def construct(self, x):
x, _ = self.unique(x)
return self.square(x)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_unique_cpu():
x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
unique = Net()
output = unique(x)
expect1 = np.array([1, 2, 3])
expect2 = np.array([0, 0, 1, 1, 2, 2])
assert (output[0].asnumpy() == expect1).all()
assert (output[1].asnumpy() == expect2).all()
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_unique_square():
x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
net = UniqueSquare()
output = net(x)
expect1 = np.array([1, 4, 9])
assert (output.asnumpy() == expect1).all()
Loading…
Cancel
Save