diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc index d45df4109d..5b16e6d8a7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc @@ -289,14 +289,14 @@ bool CreateNodeDefBytes(const std::shared_ptr &anf_node, return true; } -uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset) { +uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) { // deal1: unknown shape type auto *info = reinterpret_cast(ext_info_buf + ext_info_offset); info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; info->infoLen = sizeof(int32_t); ext_info_offset += kExtInfoHeadSize; auto *shape_type = reinterpret_cast(ext_info_buf + ext_info_offset); - *shape_type = UnknowShapeOpType::DEPEND_COMPUTE; + *shape_type = type; ext_info_offset += info->infoLen; return ext_info_offset; } @@ -401,7 +401,11 @@ bool CreateExtInfo(const std::shared_ptr &anf_node, const std::shared_p ext_info.resize(ext_info_len, 0); char *ext_info_buf = ext_info.data(); - ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset); + UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE; + if (AnfAlgo::GetCNodeName(anf_node) == "Unique") { + shape_type = UnknowShapeOpType::DEPEND_COMPUTE; + } + ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type); ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num); ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num); diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc index 89e4c8362d..6ca596a6a3 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "ir/anf.h" #include "ir/func_graph.h" #include "base/core_ops.h" @@ -30,6 +31,7 @@ #include "backend/kernel_compiler/kernel_build_info.h" #include "common/trans.h" #include "abstract/param_validator.h" +#include "abstract/primitive_infer_map.h" #include "pipeline/jit/static_analysis/static_analysis.h" #include "utils/trace_base.h" @@ -820,6 +822,8 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableWorkspaceAddr(const AnfNodePtr & void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector &types, const std::vector> &shapes, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); + auto node_ptr = node->cast(); + MS_EXCEPTION_IF_NULL(node_ptr); if (types.size() != shapes.size()) { MS_LOG(EXCEPTION) << "Types size " << types.size() << "should be same with shapes size " << shapes.size() << " trace: " << trace::DumpSourceLines(node); @@ -829,16 +833,23 @@ void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector & } else if (shapes.size() == 1) { // single output handle ShapeVector shape_int; + auto max_shape = GetOutputMaxShape(node_ptr, 0); + auto min_shape = GetOutputMinShape(node_ptr, 0); std::transform(shapes[0].begin(), shapes[0].end(), std::back_inserter(shape_int), SizeToLong); - auto abstract = std::make_shared(TypeIdToType(types[0]), shape_int); + auto abstract = std::make_shared( + TypeIdToType(types[0]), std::make_shared(shape_int, min_shape, max_shape)); node->set_abstract(abstract); } else { // multiple output handle std::vector abstract_list; for (size_t i = 0; i < types.size(); ++i) { ShapeVector shape_int; + auto max_shape = GetOutputMaxShape(node_ptr, i); + auto min_shape = GetOutputMinShape(node_ptr, i); std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(shape_int), SizeToLong); - abstract_list.emplace_back(std::make_shared(TypeIdToType(types[i]), shape_int)); + auto abstract = std::make_shared( + TypeIdToType(types[i]), std::make_shared(shape_int, min_shape, max_shape)); + abstract_list.emplace_back(abstract); } auto abstract_tuple = std::make_shared(abstract_list); node->set_abstract(abstract_tuple); @@ -1409,7 +1420,7 @@ std::vector AnfRuntimeAlgorithm::GetOutputMinShape(const AnfNodePtr &an } } -bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) { +bool IsNodeOutputDynamicShape(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); auto base_shape = node->Shape(); if (base_shape == nullptr) { @@ -1436,6 +1447,66 @@ bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) { return false; } +bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) { + MS_EXCEPTION_IF_NULL(anf_node_ptr); + auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr); + for (size_t i = 0; i < input_num; ++i) { + auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i); + auto input = input_with_index.first; + auto index = input_with_index.second; + MS_EXCEPTION_IF_NULL(input); + + auto base_shape = input->Shape(); + if (base_shape == nullptr) { + MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope(); + continue; + } + if (base_shape->isa()) { + if (IsShapeDynamic(base_shape->cast())) { + return true; + } + } else if (base_shape->isa()) { + auto tuple_shape = base_shape->cast(); + MS_EXCEPTION_IF_NULL(tuple_shape); + + if (index >= tuple_shape->size()) { + MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index + << " and tuple_shape size:" << tuple_shape->size(); + continue; + } + + auto b_shp = (*tuple_shape)[index]; + if (!b_shp->isa()) { + continue; + } + if (IsShapeDynamic(b_shp->cast())) { + return true; + } + } + } + return false; +} + +bool AnfRuntimeAlgorithm::IsNodeDynamicShape(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + MS_LOG(WARNING) << "Node is not a cnode"; + return false; + } + auto cnode = node->cast(); + auto in_dynamic = IsNodeInputDynamicShape(cnode); + auto out_dynamic = IsNodeOutputDynamicShape(cnode); + if (in_dynamic && !AnfAlgo::HasNodeAttr(kAttrInputIsDynamicShape, cnode)) { + AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode); + MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope(); + } + if (out_dynamic && !AnfAlgo::HasNodeAttr(kAttrOutputIsDynamicShape, cnode)) { + AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode); + MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope(); + } + return in_dynamic || out_dynamic; +} + std::vector AnfRuntimeAlgorithm::GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index) { auto device_shape = GetInputDeviceShape(anf_node, index); // Initialize GPUKernel with max shape to fit 'InitDynamicOutputKernelRef()' for memory reuse. @@ -1500,5 +1571,50 @@ void AnfRuntimeAlgorithm::GetAllFatherRealNode(const AnfNodePtr &anf_node, std:: GetAllFatherRealNode(cnode->input(kDependAttachNodeIndex), result, visited); } } + +void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + MS_LOG(INFO) << "InferShape start, node:" << node->DebugString(); + auto inputs = node->inputs(); + if (inputs.empty()) { + MS_LOG(EXCEPTION) << "Invalid inputs"; + } + AbstractBasePtrList args_spec_list; + auto primitive = GetValueNode(inputs[0]); + auto input_size = AnfAlgo::GetInputTensorNum(node); + for (size_t i = 0; i < input_size; ++i) { + auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i); + auto real_input = input_with_index.first; + MS_EXCEPTION_IF_NULL(real_input); + auto cnode_input = node->input(i + 1); + MS_EXCEPTION_IF_NULL(cnode_input); + if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) { + auto base_shape = real_input->Shape(); + if (!base_shape->isa()) { + MS_LOG(EXCEPTION) << "Node:" << node->DebugString() + << " input is a tuple_get_item but real input node shape is not a TupleShape"; + } + auto tuple_ptr = base_shape->cast(); + MS_EXCEPTION_IF_NULL(tuple_ptr); + auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast()); + auto real_shape = tuple_ptr->shape().at(tuple_get_item_index); + auto abstract_tensor = cnode_input->abstract()->cast(); + MS_EXCEPTION_IF_NULL(abstract_tensor); + args_spec_list.emplace_back(std::make_shared(abstract_tensor->element(), real_shape)); + } else if (cnode_input->isa() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) { + args_spec_list.emplace_back(cnode_input->abstract()); + } else { + args_spec_list.emplace_back(real_input->abstract()); + } + } + auto &prim_eval_implement_map = abstract::GetPrimitiveToEvalImplMap(); + auto ret = prim_eval_implement_map.find(primitive); + if (ret == prim_eval_implement_map.end()) { + MS_LOG(EXCEPTION) << "Get infer shape function failed, primitive name:" << primitive->name() + << " primitive type:" << primitive->type_name(); + } + auto eval_result = ret->second.impl_(nullptr, primitive, args_spec_list); + node->set_abstract(eval_result); +} } // namespace session } // namespace mindspore diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h index 56c039ba2b..4e24af27cc 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h @@ -230,6 +230,7 @@ class AnfRuntimeAlgorithm { static std::vector GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index); static std::vector GetOutputMinShape(const AnfNodePtr &anf_node, size_t index); static bool IsNodeDynamicShape(const AnfNodePtr &node); + static void InferShape(const CNodePtr &node); static std::vector GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index); static std::vector GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index); // Find control_depend real input nodes. diff --git a/mindspore/ccsrc/backend/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc index bac5d1625b..32f2352dab 100644 --- a/mindspore/ccsrc/backend/session/cpu_session.cc +++ b/mindspore/ccsrc/backend/session/cpu_session.cc @@ -65,6 +65,8 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr auto graph_id = graph_sum_; auto graph = ConstructKernelGraph(lst, outputs); MS_EXCEPTION_IF_NULL(graph); + UpdateGraphDynamicShapeAttr(NOT_NULL(graph)); + graph->UpdateGraphDynamicAttr(); MS_LOG(INFO) << "Set kernel info"; SetKernelInfo(graph.get()); #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) @@ -87,7 +89,7 @@ void CPUSession::CreateOutputTensors(const GraphId &graph_id, const std::vector< std::map *tensor_to_node) { auto kernel_graph = GetGraph(graph_id); MS_EXCEPTION_IF_NULL(kernel_graph); - runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs); + runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs, tensor_to_node); } void CPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector &inputs, diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc index ddecff5a08..bac09b1434 100644 --- a/mindspore/ccsrc/backend/session/session_basic.cc +++ b/mindspore/ccsrc/backend/session/session_basic.cc @@ -47,6 +47,41 @@ static std::shared_ptr> python_paras; void ClearPythonParasMap() { python_paras = nullptr; } namespace { const int kSummaryGetItem = 2; +bool IsUsedByRealKernel(const FuncGraphManagerPtr &manager, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(node); + auto node_users = manager->node_users()[node]; + for (auto item : node_users) { + if (AnfAlgo::IsRealKernel(item.first)) { + return true; + } + } + return false; +} + +bool IsUsedByDynamicKernel(const FuncGraphManagerPtr &manager, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(manager); + MS_EXCEPTION_IF_NULL(node); + auto node_users = manager->node_users()[node]; + for (auto item : node_users) { + if (item.first->isa() && AnfAlgo::IsNodeDynamicShape(item.first->cast())) { + return true; + } + } + return false; +} + +bool CheckIfNeedCreateOutputTensor(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + if (node->isa()) { + auto node_ptr = node->cast(); + MS_EXCEPTION_IF_NULL(node_ptr); + if (!node_ptr->is_used_by_real_kernel()) { + return true; + } + } + return false; +} ValuePtr GetParamDefaultValue(const AnfNodePtr &node) { if (node == nullptr) { @@ -114,6 +149,8 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair, MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(tensor_to_node); + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); MS_LOG(INFO) << "Create tensor for output[" << node->DebugString() << "] index[" << node_output_pair.second << "]"; // if node is a value node, no need sync addr from device to host if (node->isa()) { @@ -121,7 +158,8 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair, MS_EXCEPTION_IF_NULL(value_node); return value_node->value(); } - if (!AnfAlgo::OutputAddrExist(node, output_index)) { + if (!AnfAlgo::OutputAddrExist(node, output_index) || + (CheckIfNeedCreateOutputTensor(node) && ms_context->get_param(MS_CTX_EXECUTION_MODE) != kPynativeMode)) { if (node->isa()) { for (size_t input_idx = 0; input_idx < graph->inputs().size(); input_idx++) { if (input_idx >= input_tensors.size()) { @@ -875,9 +913,21 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con // Update Graph Dynamic Shape Attr UpdateGraphDynamicShapeAttr(NOT_NULL(graph)); - opt::BackendCommonOptimization(graph); graph->SetInputNodes(); + auto input_nodes = graph->input_nodes(); + for (auto input_node : input_nodes) { + if (input_node->isa()) { + auto node_ptr = input_node->cast(); + MS_EXCEPTION_IF_NULL(node_ptr); + if (!IsUsedByRealKernel(manager, input_node)) { + node_ptr->set_used_by_real_kernel(); + } + if (IsUsedByDynamicKernel(manager, input_node)) { + node_ptr->set_used_by_dynamic_kernel(); + } + } + } graph->SetOptimizerFlag(); return graph; } @@ -950,7 +1000,22 @@ std::shared_ptr SessionBasic::ConstructKernelGraph(const FuncGraphP MS_LOG_EXCEPTION << "construct func graph " << func_graph->ToString() << "fail!"; } } + AddParameterToGraphInputs(func_graph->parameters(), graph.get()); + FuncGraphManagerPtr manager = MakeManager({graph}); + auto input_nodes = graph->inputs(); + for (auto input_node : input_nodes) { + if (input_node->isa()) { + auto node_ptr = input_node->cast(); + MS_EXCEPTION_IF_NULL(node_ptr); + if (!IsUsedByRealKernel(manager, input_node)) { + node_ptr->set_used_by_real_kernel(); + } + if (IsUsedByDynamicKernel(manager, input_node)) { + node_ptr->set_used_by_dynamic_kernel(); + } + } + } graph->SetExecOrderByDefault(); if (ExistSummaryNode(graph.get())) { graph->set_summary_node_exist(true); @@ -1021,14 +1086,23 @@ void SessionBasic::LoadInputData(const std::shared_ptr &kernel_grap MS_EXCEPTION_IF_NULL(tensor); auto input_node = input_nodes[i]; MS_EXCEPTION_IF_NULL(input_node); + auto size = LongToSize(tensor->data().nbytes()); + if (input_node->isa() && input_node->cast()->is_used_by_dynamic_kernel()) { + auto tensor_shape = tensor->shape(); + std::vector shape_tmp; + (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(input_node, 0)}, {shape_tmp}, + input_node.get()); + size = trans::ShapeSize(shape_tmp) * trans::TypeIdSize(tensor->data_type()); + } if (input_node->isa() && AnfAlgo::OutputAddrExist(input_node, 0) && TensorNeedSync(input_node, tensor)) { auto device_address = AnfAlgo::GetMutableOutputAddr(input_node, 0); MS_EXCEPTION_IF_NULL(device_address); - if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(input_node, 0), - LongToSize(tensor->data().nbytes()), tensor->data_type(), - tensor->data_c())) { + if (size != 0 && !device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(input_node, 0), size, + tensor->data_type(), tensor->data_c())) { MS_LOG(EXCEPTION) << "SyncHostToDevice failed."; } + if (ms_context->get_param(MS_CTX_EXECUTION_MODE) == kPynativeMode || AnfAlgo::IsParameterWeight(input_node->cast())) { tensor->set_device_address(device_address); @@ -1543,55 +1617,6 @@ void SessionBasic::RunGraphAsync(const GraphId &graph_id, const std::vectorRunGraphAsync(shared_from_this(), graph_id, inputs, outputs); } -bool IsDynamicShape(const NotNull &shape) { - return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < 0; }); -} - -bool IsNodeOutputDynamicShape(const CNodePtr &anf_node_ptr) { - MS_EXCEPTION_IF_NULL(anf_node_ptr); - return AnfAlgo::IsNodeDynamicShape(anf_node_ptr); -} - -bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) { - MS_EXCEPTION_IF_NULL(anf_node_ptr); - auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr); - for (size_t i = 0; i < input_num; ++i) { - auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i); - auto input = input_with_index.first; - auto index = input_with_index.second; - MS_EXCEPTION_IF_NULL(input); - - auto base_shape = input->Shape(); - if (base_shape == nullptr) { - MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope(); - continue; - } - if (base_shape->isa()) { - if (IsDynamicShape(NOT_NULL(base_shape->cast()))) { - return true; - } - } else if (base_shape->isa()) { - auto tuple_shape = base_shape->cast(); - MS_EXCEPTION_IF_NULL(tuple_shape); - - if (index >= tuple_shape->size()) { - MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index - << " and tuple_shape size:" << tuple_shape->size(); - continue; - } - - auto b_shp = (*tuple_shape)[index]; - if (!b_shp->isa()) { - continue; - } - if (IsDynamicShape(NOT_NULL(b_shp->cast()))) { - return true; - } - } - } - return false; -} - void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector &all_graphs) { bool is_dynamic = false; for (const auto &graph : all_graphs) { @@ -1605,20 +1630,10 @@ void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector &root_graph) { for (const auto &cnode : root_graph->execution_order()) { - auto output_dynamic = IsNodeOutputDynamicShape(NOT_NULL(cnode)); - auto input_dynamic = IsNodeInputDynamicShape(NOT_NULL(cnode)); - if (output_dynamic || input_dynamic) { + if (AnfAlgo::IsNodeDynamicShape(cnode)) { AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), cnode); MS_LOG(INFO) << "Set Dynamic Shape Attr to Node:" << cnode->fullname_with_scope(); } - if (output_dynamic) { - AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode); - MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope(); - } - if (input_dynamic) { - AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode); - MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope(); - } } root_graph->UpdateGraphDynamicAttr(); } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index 6e32793b81..8a08c796c9 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -532,7 +532,7 @@ bool AscendDeviceAddress::SyncHostToDevice(const ShapeVector &shape, size_t size } if (format_ == kOpFormat_NCHW || format_ == kOpFormat_DEFAULT || format_ == kOpFormat_NDHWC) { if (type_id_ == type) { - SyncMemory(ptr_, host_ptr, size_, RT_MEMCPY_HOST_TO_DEVICE); + SyncMemory(ptr_, host_ptr, size, RT_MEMCPY_HOST_TO_DEVICE); sync_ok = true; } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) { sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size); diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc index 2f58516ec7..59b82717c5 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc +++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc @@ -66,11 +66,15 @@ void AiCpuDynamicKernel::Initialize() { input_num_ = AnfAlgo::GetInputTensorNum(cnode_ptr_); output_num_ = AnfAlgo::GetOutputTensorNum(cnode_ptr_); + UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE; + if (AnfAlgo::GetCNodeName(cnode_ptr_) == "Unique") { + shape_type = UnknowShapeOpType::DEPEND_COMPUTE; + } // Parse aicpu ext info if (is_dynamic_shape_) { MS_EXCEPTION_IF_NULL(cnode_ptr_); ext_info_handler_ = - std::make_shared(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, DEPEND_COMPUTE); + std::make_shared(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, shape_type); ext_info_handler_->Parse(ext_info_data_); } diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc index f841a6e3c4..41d37dd135 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "backend/kernel_compiler/kernel.h" #include "runtime/device/cpu/cpu_device_address.h" @@ -129,9 +130,11 @@ DeviceAddressPtr CPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t return std::make_shared(device_ptr, device_size, format, type_id); } -tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, - size_t index) { +tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput( + session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index, + std::map *tensor_to_node) { MS_EXCEPTION_IF_NULL(node); + MS_EXCEPTION_IF_NULL(tensor_to_node); size_t output_size = AnfAlgo::GetOutputTensorNum(node); if (index >= output_size) { MS_LOG(EXCEPTION) << "Invalid input index " << index; @@ -166,13 +169,16 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *k } (void)bound_addresses_.insert(address); } + session::KernelWithIndex node_index(node, index); tensor->SetNeedWait(true); tensor->SetIsGraphOutput(); + (*tensor_to_node)[tensor] = node_index; return tensor; } BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_graph, - const session::KernelWithIndex &kernel_with_index) { + const session::KernelWithIndex &kernel_with_index, + std::map *tensor_to_node) { auto &input_node = kernel_with_index.first; auto index = kernel_with_index.second; MS_EXCEPTION_IF_NULL(input_node); @@ -183,12 +189,12 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_grap VectorRef ret; for (size_t i = 1; i < node->inputs().size(); i++) { auto item_with_index = AnfAlgo::VisitKernelWithReturnType(node->input(i), 0); - auto out = CreatTensorForOutput(kernel_graph, item_with_index); + auto out = CreatTensorForOutput(kernel_graph, item_with_index, tensor_to_node); ret.push_back(out); } return ret; } - return CreatTensorForOutput(kernel_graph, node, index); + return CreatTensorForOutput(kernel_graph, node, index, tensor_to_node); } else if (input_node->isa()) { auto iter = input_param_tensor_map_.find(input_node); if (iter != input_param_tensor_map_.end()) { @@ -203,9 +209,11 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(session::KernelGraph *kernel_grap } void CPUKernelRuntime::CreateOutputTensors(session::KernelGraph *kernel_graph, - const std::vector &inputs, VectorRef *outputs) { + const std::vector &inputs, VectorRef *outputs, + std::map *tensor_to_node) { MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(outputs); + MS_EXCEPTION_IF_NULL(tensor_to_node); auto &input_nodes = kernel_graph->inputs(); if (input_nodes.size() != inputs.size()) { MS_LOG(EXCEPTION) << "Input size not equal to input node size!"; @@ -222,7 +230,7 @@ void CPUKernelRuntime::CreateOutputTensors(session::KernelGraph *kernel_graph, auto output_nodes = kernel_graph->outputs(); for (const auto &item : output_nodes) { auto item_with_index = AnfAlgo::VisitKernelWithReturnType(item, 0, true); - auto out = CreatTensorForOutput(kernel_graph, item_with_index); + auto out = CreatTensorForOutput(kernel_graph, item_with_index, tensor_to_node); outputs->push_back(std::move(out)); } } @@ -258,6 +266,12 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker MS_LOG(EXCEPTION) << "Parameter node sync host to device failed!"; } } + if (item->cast()->is_used_by_dynamic_kernel()) { + auto tensor_shape = tensor->shape(); + std::vector shape_tmp; + (void)std::transform(tensor_shape.begin(), tensor_shape.end(), std::back_inserter(shape_tmp), IntToSize); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(item, 0)}, {shape_tmp}, item.get()); + } address->ref_count_ = INIT_NODE_REF; tensor->set_device_address(address); } @@ -325,6 +339,9 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink #ifdef ENABLE_PROFILE double start_time = GetTime(); #endif + if (AnfAlgo::IsDynamicShape(kernel)) { + AnfAlgo::InferShape(kernel); + } std::vector kernel_inputs; std::vector kernel_workspaces; std::vector kernel_outputs; diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h index 928ca9430b..4574bb4fd9 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h @@ -39,7 +39,7 @@ class CPUKernelRuntime : public KernelRuntime { bool Run(session::KernelGraph *graph, bool is_task_sink) override; void AssignKernelAddress(session::KernelGraph *kernel_graph); void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector &inputs, - VectorRef *outputs); + VectorRef *outputs, std::map *tensor_to_node); void BindInputOutput(session::KernelGraph *kernel_graph, const std::vector &inputs, VectorRef *outputs); void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs); @@ -53,8 +53,10 @@ class CPUKernelRuntime : public KernelRuntime { TypeId type_id) override; private: - tensor::TensorPtr CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index); - BaseRef CreatTensorForOutput(session::KernelGraph *kernel_graph, const session::KernelWithIndex &kernel_with_index); + tensor::TensorPtr CreatTensorForOutput(session::KernelGraph *kernel_graph, const CNodePtr &node, size_t index, + std::map *tensor_to_node); + BaseRef CreatTensorForOutput(session::KernelGraph *kernel_graph, const session::KernelWithIndex &kernel_with_index, + std::map *tensor_to_node); void BindInputTensorAddressPtr(const session::KernelGraph &graph, const std::vector &inputs); void BindOutputTensorAddressPtr(const VectorRef *outputs); void AssignValueNodeAddress(session::KernelGraph *kernel_graph); diff --git a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc index bc3165133c..ec009b5604 100644 --- a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc +++ b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc @@ -51,17 +51,6 @@ void DynamicKernel::Initialize() { int DynamicKernel::GetKernelType() { return AnfAlgo::GetKernelType(cnode_ptr_); } -bool IsTupleGetItem(const AnfNodePtr &anf_node) { - MS_EXCEPTION_IF_NULL(anf_node); - if (!anf_node->isa()) { - return false; - } - auto cnode = anf_node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - auto input0 = cnode->input(0); - return IsPrimitive(input0, prim::kPrimTupleGetItem); -} - void DynamicKernel::RebuildDependTensor() { depend_tensor_map_.clear(); for (auto depend : depend_list_) { @@ -112,7 +101,7 @@ void DynamicKernel::InferShape() { auto cnode_input = cnode_ptr_->input(i + 1); MS_EXCEPTION_IF_NULL(cnode_input); - if (IsTupleGetItem(cnode_input)) { + if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) { auto base_shape = real_input->Shape(); if (!base_shape->isa()) { MS_LOG(EXCEPTION) << "Node:" << cnode_ptr_->fullname_with_scope() diff --git a/mindspore/core/abstract/abstract_value.h b/mindspore/core/abstract/abstract_value.h index 126398b70a..109336e481 100644 --- a/mindspore/core/abstract/abstract_value.h +++ b/mindspore/core/abstract/abstract_value.h @@ -259,6 +259,13 @@ class AbstractUndetermined : public AbstractBase { } set_shape(std::make_shared(shape)); } + explicit AbstractUndetermined(const TypePtr &element_type, const BaseShapePtr &shape = std::make_shared()) + : AbstractBase(kAnyValue), element_(std::make_shared(kAnyValue, element_type)) { + if (element_type == nullptr) { + MS_LOG(EXCEPTION) << "element_type is nullptr"; + } + set_shape(shape); + } ~AbstractUndetermined() override = default; MS_DECLARE_PARENT(AbstractUndetermined, AbstractBase) TypePtr BuildType() const override { return std::make_shared(); } @@ -277,6 +284,8 @@ class AbstractTensor : public AbstractUndetermined { : AbstractUndetermined(element, shape) {} AbstractTensor(const TypePtr &element_type, const ShapeVector &shape) : AbstractUndetermined(element_type, shape) {} explicit AbstractTensor(const tensor::TensorPtr &tensor) : AbstractUndetermined(tensor->Dtype(), tensor->shape()) {} + explicit AbstractTensor(const TypePtr &element_type, const BaseShapePtr &shape = std::make_shared()) + : AbstractUndetermined(element_type, shape) {} ~AbstractTensor() override = default; MS_DECLARE_PARENT(AbstractTensor, AbstractUndetermined) diff --git a/mindspore/core/abstract/utils.cc b/mindspore/core/abstract/utils.cc index b250a0b2d3..35d65f394d 100644 --- a/mindspore/core/abstract/utils.cc +++ b/mindspore/core/abstract/utils.cc @@ -26,6 +26,12 @@ namespace mindspore { namespace abstract { +const std::map type_map = {{kNumberTypeBool, 1}, {kNumberTypeInt, 4}, {kNumberTypeInt8, 1}, + {kNumberTypeInt16, 2}, {kNumberTypeInt32, 4}, {kNumberTypeInt64, 8}, + {kNumberTypeUInt, 4}, {kNumberTypeUInt8, 1}, {kNumberTypeUInt16, 2}, + {kNumberTypeUInt32, 4}, {kNumberTypeUInt64, 8}, {kNumberTypeFloat, 4}, + {kNumberTypeFloat16, 2}, {kNumberTypeFloat32, 4}, {kNumberTypeFloat64, 8}}; + ValuePtr ValueJoin(const ValuePtr &value1, const ValuePtr &value2) { MS_EXCEPTION_IF_NULL(value1); MS_EXCEPTION_IF_NULL(value2); @@ -291,5 +297,18 @@ ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tenso auto y_shape = tensor_y_shape->shape(); return std::make_shared(RealBroadcast(op, x_shape, y_shape)); } + +size_t TypeIdSize(const TypeId data_type) { + const size_t unsupported_type_error = 0; + auto iter = type_map.find(data_type); + if (iter != type_map.end()) { + return iter->second; + } + return unsupported_type_error; +} + +size_t ShapeSize(const std::vector &shape) { + return std::accumulate(shape.begin(), shape.end(), IntToSize(1), std::multiplies()); +} } // namespace abstract } // namespace mindspore diff --git a/mindspore/core/abstract/utils.h b/mindspore/core/abstract/utils.h index 3287e15184..76e108bc2b 100644 --- a/mindspore/core/abstract/utils.h +++ b/mindspore/core/abstract/utils.h @@ -51,6 +51,9 @@ int64_t GetPositiveAxis(int64_t axis_value, size_t increment); ShapeVector BroadcastShape(ShapeVector shpx, ShapeVector shpy); +size_t TypeIdSize(const TypeId data_type); +size_t ShapeSize(const std::vector &shape); + // Get broadcasted shape for binary element-wise operation ShapePtr GetBroadcastShape(const std::string &op, const AbstractTensorPtr &tensor_x, const AbstractTensorPtr &tensor_y); } // namespace abstract diff --git a/mindspore/core/ir/anf.h b/mindspore/core/ir/anf.h index e3dbd23dac..62c1c164a7 100644 --- a/mindspore/core/ir/anf.h +++ b/mindspore/core/ir/anf.h @@ -322,9 +322,17 @@ class Parameter : public ANode { return shared_from_this() == other.shared_from_this(); } + void set_used_by_real_kernel() { is_real_kernel_used_ = false; } + bool is_used_by_real_kernel() { return is_real_kernel_used_; } + + void set_used_by_dynamic_kernel() { is_used_by_dynamic_kernel_ = true; } + bool is_used_by_dynamic_kernel() { return is_used_by_dynamic_kernel_; } + private: std::string name_; bool has_default_; + bool is_real_kernel_used_ = true; + bool is_used_by_dynamic_kernel_ = false; ValuePtr default_param_; // The count of graphs using the parameter. int used_graph_count_; diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc index a814ed26e2..0096832d3a 100644 --- a/mindspore/core/ir/tensor.cc +++ b/mindspore/core/ir/tensor.cc @@ -29,6 +29,7 @@ #include #include +#include "abstract/utils.h" #include "abstract/abstract_value.h" namespace mindspore { @@ -581,8 +582,11 @@ void Tensor::data_sync(bool need_wait) const { if (device_sync_ == nullptr) { return; } + std::vector shape_tmp; + (void)std::transform(shape().begin(), shape().end(), std::back_inserter(shape_tmp), IntToSize); + auto size = abstract::ShapeSize(shape_tmp) * abstract::TypeIdSize(data_type()); auto address = device_sync_; - if (!address->SyncDeviceToHost(shape(), static_cast(data().nbytes()), data_type(), data_c())) { + if (size != 0 && !address->SyncDeviceToHost(shape(), size, data_type(), data_c())) { MS_LOG(EXCEPTION) << "SyncDeviceToHost failed."; } sync_status_ = kNeedSyncHostToDevice; diff --git a/tests/st/dynamic_shape/test_ascend_cpu.py b/tests/st/dynamic_shape/test_ascend_cpu.py new file mode 100644 index 0000000000..e3c4938d60 --- /dev/null +++ b/tests/st/dynamic_shape/test_ascend_cpu.py @@ -0,0 +1,70 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.unique = P.Unique().add_prim_attr("primitive_target", "CPU") + + def construct(self, x): + x, y = self.unique(x) + return (x, y) + + +class UniqueSquare(nn.Cell): + def __init__(self): + super(UniqueSquare, self).__init__() + self.unique = P.Unique().add_prim_attr("primitive_target", "CPU") + self.square = P.Square() + + def construct(self, x): + x, _ = self.unique(x) + return self.square(x) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_unique_ascend(): + x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32) + unique = Net() + output = unique(x) + expect1 = np.array([1, 2, 3]) + expect2 = np.array([0, 0, 1, 1, 2, 2]) + assert (output[0].asnumpy() == expect1).all() + assert (output[1].asnumpy() == expect2).all() + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_unique_square(): + x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32) + net = UniqueSquare() + output = net(x) + expect1 = np.array([1, 4, 9]) + assert (output.asnumpy() == expect1).all() diff --git a/tests/st/dynamic_shape/test_unique_cpu.py b/tests/st/dynamic_shape/test_unique_cpu.py new file mode 100644 index 0000000000..6e2b57ab09 --- /dev/null +++ b/tests/st/dynamic_shape/test_unique_cpu.py @@ -0,0 +1,69 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.unique = P.Unique() + + def construct(self, x): + return self.unique(x) + + +class UniqueSquare(nn.Cell): + def __init__(self): + super(UniqueSquare, self).__init__() + self.unique = P.Unique() + self.square = P.Square() + + def construct(self, x): + x, _ = self.unique(x) + return self.square(x) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_unique_cpu(): + x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32) + unique = Net() + output = unique(x) + expect1 = np.array([1, 2, 3]) + expect2 = np.array([0, 0, 1, 1, 2, 2]) + assert (output[0].asnumpy() == expect1).all() + assert (output[1].asnumpy() == expect2).all() + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_unique_square(): + x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32) + net = UniqueSquare() + output = net(x) + expect1 = np.array([1, 4, 9]) + assert (output.asnumpy() == expect1).all()