!468 Adding security checks and support optional inputs

From: @xchu42
Reviewed-by: @ji_chen,@wqtshg
Signed-off-by: @ji_chen
pull/468/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 66e309f700

@ -23,6 +23,8 @@
namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
size_t kMaxHbmMemorySize = 1024UL * 1024UL * 1024UL * 1024UL; // 1024G
std::map<uint32_t, std::unique_ptr<NpuMemoryAllocator>> NpuMemoryAllocator::allocators_; std::map<uint32_t, std::unique_ptr<NpuMemoryAllocator>> NpuMemoryAllocator::allocators_;
std::mutex NpuMemoryAllocator::mu_; std::mutex NpuMemoryAllocator::mu_;
@ -62,6 +64,10 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
} else if (mem_type == HOST_DDR) { } else if (mem_type == HOST_DDR) {
buffer = malloc(allocate_size); buffer = malloc(allocate_size);
} else { } else {
if (allocate_size > kMaxHbmMemorySize) {
GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size);
return nullptr;
}
void *try_reuse_addr = nullptr; void *try_reuse_addr = nullptr;
int padding = kDefaultPadding; int padding = kDefaultPadding;
if (attr != nullptr) { if (attr != nullptr) {

@ -58,6 +58,7 @@ std::unique_ptr<TensorBuffer> TensorBuffer::Create(void *buffer, size_t size) {
TensorBuffer::~TensorBuffer() { TensorBuffer::~TensorBuffer() {
if (allocator_ != nullptr && buffer_ != nullptr) { if (allocator_ != nullptr && buffer_ != nullptr) {
allocator_->Deallocate(buffer_, mem_type_); allocator_->Deallocate(buffer_, mem_type_);
buffer_ = nullptr;
} }
} }

@ -52,7 +52,7 @@ Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper>
} }
Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &listener) { Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &listener) {
GELOGD("HybridModelExecutor::Start IN, listener = %p", listener.get()); GELOGD("HybridModelExecutor::Start IN, has listener = %d", listener != nullptr);
std::lock_guard<std::mutex> lk(mu_); std::lock_guard<std::mutex> lk(mu_);
GE_CHK_BOOL_RET_STATUS(!run_flag_, INTERNAL_ERROR, "Model already started."); GE_CHK_BOOL_RET_STATUS(!run_flag_, INTERNAL_ERROR, "Model already started.");
@ -219,11 +219,11 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData &current_data) {
auto mem_size = static_cast<uint32_t>(data_size); auto mem_size = static_cast<uint32_t>(data_size);
GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
PARAM_INVALID, PARAM_INVALID,
"input data size(%u) does not match model required size(%u), ret failed.", "input data size(%lu) does not match model required size(%u), ret failed.",
data_buf.length, data_buf.length,
mem_size); mem_size);
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%u]", GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]",
model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length); model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length);
GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(), GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(),
mem_size, mem_size,
@ -241,7 +241,7 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
int input_index = 0; int input_index = 0;
for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) { for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str()); GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str());
auto output_desc = input_node->op_desc->GetOutputDescPtr(kDataOutputIndex); auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
GE_CHECK_NOTNULL(output_desc); GE_CHECK_NOTNULL(output_desc);
int64_t tensor_size = 0; int64_t tensor_size = 0;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),

@ -35,39 +35,43 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
this->num_pending_shapes_); this->num_pending_shapes_);
} }
void ShapeInferenceState::UpdateInputShape(uint32_t idx, Status ShapeInferenceState::UpdateInputShape(int idx,
const GeShape &ori_shape, const GeShape &ori_shape,
const GeShape &shape) { const GeShape &shape) {
if (!node_item.is_dynamic || node_item.is_input_shape_static[idx]) { if (node_item.IsInputShapeStatic(idx)) {
GELOGD("[%s] Trying to update static shape, idx = %u. old shape = [%s], new shape = [%s]", GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
node_item.NodeName().c_str(), node_item.NodeName().c_str(),
idx, idx,
node_item.op_desc->MutableInputDesc(idx)->GetShape().ToString().c_str(), node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
shape.ToString().c_str()); shape.ToString().c_str());
return; return SUCCESS;
} }
GELOGD("[%s] Update input shape [%u] with Shape: [%s] and OriginalShape: [%s]", GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
node_item.NodeName().c_str(), node_item.NodeName().c_str(),
idx, idx,
shape.ToString().c_str(), shape.ToString().c_str(),
ori_shape.ToString().c_str()); ori_shape.ToString().c_str());
std::lock_guard<std::mutex> lk(mu_); std::lock_guard<std::mutex> lk(mu_);
node_item.op_desc->MutableInputDesc(idx)->SetShape(shape); auto tensor_desc = node_item.MutableInputDesc(idx);
node_item.op_desc->MutableInputDesc(idx)->SetOriginShape(ori_shape); GE_CHECK_NOTNULL(tensor_desc);
tensor_desc->SetShape(shape);
tensor_desc->SetOriginShape(ori_shape);
if (--num_pending_shapes_ == 0) { if (--num_pending_shapes_ == 0) {
ready_cv_.notify_all(); ready_cv_.notify_all();
} }
return SUCCESS;
} }
void ShapeInferenceState::UpdateInputShapeFuture(uint32_t idx, ShapeFuture &&future) { void ShapeInferenceState::UpdateInputShapeFuture(int idx, ShapeFuture &&future) {
if (!node_item.is_dynamic || node_item.is_input_shape_static[idx]) { if (node_item.IsInputShapeStatic(idx)) {
GELOGD("[%s] Trying to update constant shape, idx = %u", node_item.NodeName().c_str(), idx); GELOGD("[%s] Trying to update constant shape, idx = %d", node_item.NodeName().c_str(), idx);
return; return;
} }
GELOGD("[%s] Update input shape [%u] with ShapeFuture.", node_item.NodeName().c_str(), idx); GELOGD("[%s] Update input shape [%d] with ShapeFuture.", node_item.NodeName().c_str(), idx);
std::lock_guard<std::mutex> lk(mu_); std::lock_guard<std::mutex> lk(mu_);
shape_futures.emplace_back(idx, std::move(future)); shape_futures.emplace_back(idx, std::move(future));
if (--num_pending_shapes_ == 0) { if (--num_pending_shapes_ == 0) {
@ -120,8 +124,10 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
idx, idx,
shape.ToString().c_str(), shape.ToString().c_str(),
ori_shape.ToString().c_str()); ori_shape.ToString().c_str());
node_item.op_desc->MutableInputDesc(idx)->SetShape(std::move(shape)); auto input_desc = node_item.MutableInputDesc(idx);
node_item.op_desc->MutableInputDesc(idx)->SetOriginShape(ori_shape); GE_CHECK_NOTNULL(input_desc);
input_desc->SetShape(std::move(shape));
input_desc->SetOriginShape(ori_shape);
} }
return SUCCESS; return SUCCESS;
@ -140,7 +146,7 @@ NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_contex
Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const {
for (auto &src_node : node_item_->dependents_for_execution) { for (auto &src_node : node_item_->dependents_for_execution) {
GELOGI("[%s] Start to wait for data dependent node: [%s]", GELOGD("[%s] Start to wait for data dependent node: [%s]",
node_item_->NodeName().c_str(), node_item_->NodeName().c_str(),
src_node->GetName().c_str()); src_node->GetName().c_str());
RECORD_EXECUTION_EVENT(&context, RECORD_EXECUTION_EVENT(&context,
@ -156,7 +162,7 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const {
node_item_->NodeName().c_str(), node_item_->NodeName().c_str(),
"[AwaitNodeDone] [%s] End", "[AwaitNodeDone] [%s] End",
src_node->GetName().c_str()); src_node->GetName().c_str());
GELOGI("[%s] Done waiting node.", src_node->GetName().c_str()); GELOGD("[%s] Done waiting node.", src_node->GetName().c_str());
} }
return SUCCESS; return SUCCESS;
@ -173,7 +179,7 @@ Status NodeState::WaitForPrepareDone() {
} }
Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
GELOGI("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
if (!subgraph_context_->Await(src_node_)) { if (!subgraph_context_->Await(src_node_)) {
GELOGE(INTERNAL_ERROR, "cancelled"); GELOGE(INTERNAL_ERROR, "cancelled");
return INTERNAL_ERROR; return INTERNAL_ERROR;
@ -181,7 +187,7 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->MutableShape(); shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->MutableShape();
ori_shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->GetOriginShape(); ori_shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->GetOriginShape();
GELOGI("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
return SUCCESS; return SUCCESS;
} }
} // namespace hybrid } // namespace hybrid

@ -45,16 +45,16 @@ class ShapeFuture {
struct ShapeInferenceState { struct ShapeInferenceState {
explicit ShapeInferenceState(const NodeItem &node_item); explicit ShapeInferenceState(const NodeItem &node_item);
void UpdateInputShape(uint32_t idx, const GeShape &ori_shape, const GeShape &shape); Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);
void UpdateInputShapeFuture(uint32_t idx, ShapeFuture &&future); void UpdateInputShapeFuture(int idx, ShapeFuture &&future);
Status AwaitShapesReady(const GraphExecutionContext &context); Status AwaitShapesReady(const GraphExecutionContext &context);
const NodeItem &node_item; const NodeItem &node_item;
private: private:
std::vector<std::pair<uint32_t, ShapeFuture>> shape_futures; std::vector<std::pair<int, ShapeFuture>> shape_futures;
int num_pending_shapes_ = 0; int num_pending_shapes_ = 0;
std::condition_variable ready_cv_; std::condition_variable ready_cv_;
std::mutex mu_; std::mutex mu_;

@ -36,13 +36,13 @@ Status LogInputs(const NodeItem &node_item, const TaskContext &task_context) {
for (auto i = 0; i < task_context.NumInputs(); ++i) { for (auto i = 0; i < task_context.NumInputs(); ++i) {
const auto &input_tensor = task_context.GetInput(i); const auto &input_tensor = task_context.GetInput(i);
GE_CHECK_NOTNULL(input_tensor); GE_CHECK_NOTNULL(input_tensor);
const auto &tensor_desc = node_item.op_desc->MutableInputDesc(i); const auto &tensor_desc = task_context.GetInputDesc(i);
GE_CHECK_NOTNULL(tensor_desc); GE_CHECK_NOTNULL(tensor_desc);
GELOGD("[%s] Print task args. input[%d] = %s, shape = [%s]", GELOGD("[%s] Print task args. input[%d] = %s, shape = [%s]",
node_item.NodeName().c_str(), node_item.NodeName().c_str(),
i, i,
input_tensor->DebugString().c_str(), input_tensor->DebugString().c_str(),
tensor_desc->MutableShape().ToString().c_str()); tensor_desc->GetShape().ToString().c_str());
} }
return SUCCESS; return SUCCESS;
@ -52,7 +52,7 @@ Status LogOutputs(const NodeItem &node_item, const TaskContext &task_context) {
for (auto i = 0; i < task_context.NumOutputs(); ++i) { for (auto i = 0; i < task_context.NumOutputs(); ++i) {
const auto &output_tensor = task_context.GetOutput(i); const auto &output_tensor = task_context.GetOutput(i);
GE_CHECK_NOTNULL(output_tensor); GE_CHECK_NOTNULL(output_tensor);
const auto &tensor_desc = node_item.op_desc->MutableOutputDesc(i); const auto &tensor_desc = node_item.MutableOutputDesc(i);
GE_CHECK_NOTNULL(tensor_desc); GE_CHECK_NOTNULL(tensor_desc);
GELOGD("[%s] Print task args. output[%d] = %s, shape = [%s]", GELOGD("[%s] Print task args. output[%d] = %s, shape = [%s]",
node_item.NodeName().c_str(), node_item.NodeName().c_str(),
@ -97,7 +97,7 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) {
GE_CHECK_NOTNULL(output_tensor); GE_CHECK_NOTNULL(output_tensor);
Tensor tensor; Tensor tensor;
auto ge_tensor_desc = node_item.op_desc->MutableOutputDesc(output_idx); auto ge_tensor_desc = node_item.MutableOutputDesc(output_idx);
GE_CHECK_NOTNULL(ge_tensor_desc); GE_CHECK_NOTNULL(ge_tensor_desc);
tensor.SetTensorDesc(TensorAdapter::GeTensorDesc2TensorDesc(*ge_tensor_desc)); tensor.SetTensorDesc(TensorAdapter::GeTensorDesc2TensorDesc(*ge_tensor_desc));
@ -107,7 +107,7 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) {
if (output_tensor->GetSize() < static_cast<size_t>(tensor_size)) { if (output_tensor->GetSize() < static_cast<size_t>(tensor_size)) {
GELOGE(INTERNAL_ERROR, GELOGE(INTERNAL_ERROR,
"[%s] Tensor size is not enough. output index = %d, required size = %zu, tensor = %s", "[%s] Tensor size is not enough. output index = %d, required size = %ld, tensor = %s",
node_item.NodeName().c_str(), node_item.NodeName().c_str(),
output_idx, output_idx,
tensor_size, tensor_size,
@ -453,7 +453,7 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const
continue; continue;
} }
const auto &tensor_desc = node_state.GetOpDesc()->MutableInputDesc(i); const auto &tensor_desc = task_context.MutableInputDesc(i);
GE_CHECK_NOTNULL(tensor_desc); GE_CHECK_NOTNULL(tensor_desc);
if (tensor_desc->GetDataType() == DT_STRING) { if (tensor_desc->GetDataType() == DT_STRING) {
GELOGD("[%s] Skipping DT_STRING input, index = %d", task_context.GetNodeName(), i); GELOGD("[%s] Skipping DT_STRING input, index = %d", task_context.GetNodeName(), i);

@ -142,12 +142,15 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
dst_input_index_and_node.first); dst_input_index_and_node.first);
// in case type 3 and 4, shape will be valid after computing is done // in case type 3 and 4, shape will be valid after computing is done
auto &infer_state = dst_node_state->GetShapeInferenceState();
if (shape_is_future) { if (shape_is_future) {
ShapeFuture future(node_item.node, i, subgraph_context_); ShapeFuture future(node_item.node, i, subgraph_context_);
dst_node_state->GetShapeInferenceState().UpdateInputShapeFuture(dst_input_index_and_node.first, infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
std::move(future)); std::move(future));
} else { } else {
dst_node_state->GetShapeInferenceState().UpdateInputShape(dst_input_index_and_node.first, ori_shape, shape); GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
ori_shape,
shape));
} }
} }
} }
@ -159,7 +162,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, const FusedSubgraph &fused_subgraph) { Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, const FusedSubgraph &fused_subgraph) {
GELOGD("[%s] Start to infer shape by fused subgraph", node_item.NodeName().c_str()); GELOGD("[%s] Start to infer shape by fused subgraph", node_item.NodeName().c_str());
for (auto &it : fused_subgraph.input_mapping) { for (auto &it : fused_subgraph.input_mapping) {
auto parent_tensor_desc = node_item.op_desc->MutableInputDesc(it.first); auto parent_tensor_desc = node_item.MutableInputDesc(it.first);
GE_CHECK_NOTNULL(parent_tensor_desc); GE_CHECK_NOTNULL(parent_tensor_desc);
GELOGD("Start to update shape by input[%u]", it.first); GELOGD("Start to update shape by input[%u]", it.first);
GELOGD("Update shape to [%s]", parent_tensor_desc->GetShape().ToString().c_str()); GELOGD("Update shape to [%s]", parent_tensor_desc->GetShape().ToString().c_str());

@ -40,11 +40,11 @@ Status GraphItem::GetOutputDescList(vector<ConstGeTensorDescPtr> &output_desc_li
} }
if (is_dynamic_) { if (is_dynamic_) {
for (auto &tensor_desc : output_node_->op_desc->GetAllInputsDescPtr()) { for (auto &tensor_desc : output_node_->GetOpDesc()->GetAllInputsDescPtr()) {
output_desc_list.emplace_back(tensor_desc); output_desc_list.emplace_back(tensor_desc);
} }
} else { } else {
for (auto &tensor_desc : output_node_->op_desc->GetAllOutputsDescPtr()) { for (auto &tensor_desc : output_node_->GetOpDesc()->GetAllOutputsDescPtr()) {
output_desc_list.emplace_back(tensor_desc); output_desc_list.emplace_back(tensor_desc);
} }
} }

@ -44,7 +44,7 @@ Status HybridModel::Init() {
TensorValue* HybridModel::GetVariable(const string &name) const { TensorValue* HybridModel::GetVariable(const string &name) const {
auto it = variable_tensors_.find(name); auto it = variable_tensors_.find(name);
if (it == variable_tensors_.end()) { if (it == variable_tensors_.end()) {
GELOGI("Failed to get variable tensor. var name = [%s]", name.c_str()); GELOGD("Failed to get variable tensor. var name = [%s]", name.c_str());
return nullptr; return nullptr;
} }
@ -61,7 +61,7 @@ NodePtr HybridModel::GetVariableNode(const string &name) const {
if (host_find != host_variable_nodes_.end()) { if (host_find != host_variable_nodes_.end()) {
return host_find->second; return host_find->second;
} }
GELOGI("Failed to get variable node by name = [%s]", name.c_str()); GELOGD("Failed to get variable node by name = [%s]", name.c_str());
return nullptr; return nullptr;
} }

@ -19,14 +19,12 @@
#include "common/math/math_util.h" #include "common/math/math_util.h"
#include "graph/ge_context.h" #include "graph/ge_context.h"
#include "graph/build/memory/var_mem_assign_util.h" #include "graph/build/memory/var_mem_assign_util.h"
#include "graph/utils/node_utils.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/model_utils.h" #include "graph/load/new_model_manager/model_utils.h"
#include "graph/manager/graph_var_manager.h" #include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h" #include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h" #include "graph/manager/trans_var_data_utils.h"
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
#include "graph/utils/type_utils.h"
#include "hybrid/common/npu_memory_allocator.h" #include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/node_executor/node_executor.h" #include "hybrid/node_executor/node_executor.h"
@ -44,20 +42,14 @@ int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) {
auto data_type = desc.GetDataType(); auto data_type = desc.GetDataType();
if (data_type == DT_STRING) { if (data_type == DT_STRING) {
(void) TensorUtils::GetSize(desc, var_size); (void) TensorUtils::GetSize(desc, var_size);
} else { return var_size;
var_size = GetSizeByDataType(data_type);
if (var_size <= 0) {
GELOGW("Failed to calc var data size from data type %s", TypeUtils::DataTypeToSerialString(data_type).c_str());
return -1;
}
auto shape = desc.GetShape();
auto dim_num = shape.GetDimNum();
for (size_t dim_index = 0; dim_index < dim_num; ++dim_index) {
var_size *= shape.GetDim(dim_index);
}
// padding up to multiple of kAlignment, and add extra kAlignment
var_size = (var_size + kAlignment * 2 - 1) / kAlignment * kAlignment;
} }
if (TensorUtils::GetTensorMemorySizeInBytes(desc, var_size) != GRAPH_SUCCESS) {
GELOGW("Failed to calc var data size");
return -1;
}
return var_size; return var_size;
} }
@ -150,7 +142,12 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite
GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item), GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
"[%s] Failed to get or create node item.", "[%s] Failed to get or create node item.",
dst_node->GetName().c_str()); dst_node->GetName().c_str());
node_item.outputs[i].emplace_back(dst_in_anchor->GetIdx(), dst_node_item); int canonical_index;
GE_CHK_STATUS_RET(dst_node_item->GetCanonicalInputIndex(dst_in_anchor->GetIdx(), canonical_index),
"[%s] Failed to canonical input index",
dst_node->GetName().c_str());
node_item.outputs[i].emplace_back(canonical_index, dst_node_item);
} }
} }
@ -171,7 +168,8 @@ Status HybridModelBuilder::ResolveRefIo(NodeItem &node_item) {
for (auto &output : outputs) { for (auto &output : outputs) {
for (auto &input : inputs) { for (auto &input : inputs) {
if (input.first == output.first) { if (input.first == output.first) {
auto input_idx = static_cast<int>(input.second); int input_idx;
GE_CHK_STATUS_RET_NOLOG(node_item.GetCanonicalInputIndex(input.second, input_idx));
auto output_idx = static_cast<int>(output.second); auto output_idx = static_cast<int>(output.second);
node_item.reuse_inputs[output_idx] = input_idx; node_item.reuse_inputs[output_idx] = input_idx;
GELOGD("[%s] Output[%d] reuse input[%d]", node_item.NodeName().c_str(), output_idx, input_idx); GELOGD("[%s] Output[%d] reuse input[%d]", node_item.NodeName().c_str(), output_idx, input_idx);
@ -190,10 +188,8 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
return SUCCESS; return SUCCESS;
} }
auto new_node = std::unique_ptr<NodeItem>(new(std::nothrow) NodeItem(node)); std::unique_ptr<NodeItem> new_node;
GE_CHECK_NOTNULL(new_node); GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "Failed to create node item");
GE_CHECK_NOTNULL(new_node->op_desc);
GE_CHK_STATUS_RET(new_node->Init(), "Failed to init NodeItem [%s] .", node->GetName().c_str());
GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor)); GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor));
// we do not need L2 Buffer // we do not need L2 Buffer
@ -202,10 +198,6 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
(void) AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false); (void) AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false);
(void) AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false); (void) AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false);
if (new_node->is_dynamic && (new_node->IsControlOp() || new_node->NodeType() == PARTITIONEDCALL)) {
new_node->shape_inference_type = DEPEND_COMPUTE;
}
new_node->node_id = node_index; new_node->node_id = node_index;
new_node->op_desc->SetId(node_index); new_node->op_desc->SetId(node_index);
node_index += 1; node_index += 1;
@ -446,7 +438,6 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) {
if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) { if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) {
continue; continue;
} }
auto src_node = wrapped_node_in_anchor->GetPeerOutAnchor()->GetOwnerNode();
wrapped_node_in_anchor->UnlinkAll(); wrapped_node_in_anchor->UnlinkAll();
// link src to outputs of DataNode // link src to outputs of DataNode
@ -454,6 +445,7 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) {
GE_CHECK_NOTNULL(out_data_anchor); GE_CHECK_NOTNULL(out_data_anchor);
for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
auto dst_node = peer_in_data_anchor->GetOwnerNode(); auto dst_node = peer_in_data_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(dst_node);
root_nodes.emplace(dst_node); root_nodes.emplace(dst_node);
GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(out_data_anchor, peer_in_data_anchor)); GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(out_data_anchor, peer_in_data_anchor));
GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, peer_in_data_anchor)); GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, peer_in_data_anchor));
@ -496,6 +488,7 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) { for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) {
auto src_out_anchor = in_data_anchor->GetPeerOutAnchor(); auto src_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(src_out_anchor); GE_CHECK_NOTNULL(src_out_anchor);
GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode());
GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(src_out_anchor, in_data_anchor)); GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(src_out_anchor, in_data_anchor));
auto index = in_data_anchor->GetIdx(); auto index = in_data_anchor->GetIdx();
@ -519,6 +512,7 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
continue; continue;
} }
GE_CHECK_NOTNULL(dst_in_anchor->GetOwnerNode());
GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(parent_out_anchor, dst_in_anchor)); GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(parent_out_anchor, dst_in_anchor));
GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, dst_in_anchor)); GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, dst_in_anchor));
} }
@ -628,8 +622,7 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph,
Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item,
const NodeItem &node_item, const NodeItem &node_item,
bool is_root_graph) { bool is_root_graph) {
auto output_size = node_item.op_desc->GetAllInputsSize(); auto output_size = node_item.num_inputs;
GE_CHECK_LE(output_size, UINT32_MAX);
graph_item.output_edges_.resize(output_size); graph_item.output_edges_.resize(output_size);
for (auto &in_data_anchor : node_item.node->GetAllInDataAnchors()) { for (auto &in_data_anchor : node_item.node->GetAllInDataAnchors()) {
@ -640,14 +633,16 @@ Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item,
auto src_node_item = GetNodeItem(src_node); auto src_node_item = GetNodeItem(src_node);
GE_CHECK_NOTNULL(src_node_item); GE_CHECK_NOTNULL(src_node_item);
auto output_idx = in_data_anchor->GetIdx();
auto output_offset = src_node_item->output_start + peer_out_anchor->GetIdx(); auto output_offset = src_node_item->output_start + peer_out_anchor->GetIdx();
GELOGI("Output[%d], node = %s, output_index = %d, output_offset = %d ", GELOGI("Output[%d], node = %s, output_index = %d, output_offset = %d ",
in_data_anchor->GetIdx(), output_idx,
src_node_item->NodeName().c_str(), src_node_item->NodeName().c_str(),
peer_out_anchor->GetIdx(), peer_out_anchor->GetIdx(),
output_offset); output_offset);
graph_item.output_edges_[in_data_anchor->GetIdx()] = {src_node_item, peer_out_anchor->GetIdx()}; GE_CHECK_LE(output_idx, output_size - 1);
graph_item.output_edges_[output_idx] = {src_node_item, peer_out_anchor->GetIdx()};
} }
if (!is_root_graph) { if (!is_root_graph) {
@ -820,6 +815,10 @@ Status HybridModelBuilder::InitConstantOps() {
const NodePtr &var_node = it.second; const NodePtr &var_node = it.second;
auto op_desc = var_node->GetOpDesc(); auto op_desc = var_node->GetOpDesc();
auto v_weights = ModelUtils::GetWeights(op_desc); auto v_weights = ModelUtils::GetWeights(op_desc);
if (v_weights.empty()) {
GELOGE(INTERNAL_ERROR, "[%s] Constant no not have value", var_node->GetName().c_str());
return INTERNAL_ERROR;
}
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
std::unique_ptr<TensorValue> var_tensor; std::unique_ptr<TensorValue> var_tensor;
@ -884,6 +883,7 @@ Status HybridModelBuilder::InitVariableTensors() {
GELOGD("Host variable [%s] malloc success.", it.first.c_str()); GELOGD("Host variable [%s] malloc success.", it.first.c_str());
std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size));
GE_CHECK_NOTNULL(tensor);
hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor));
} }
@ -931,7 +931,7 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr
GELOGD("Set ge_model for subgraph: [%s], task_size = %d", GELOGD("Set ge_model for subgraph: [%s], task_size = %d",
sub_graph.GetName().c_str(), sub_graph.GetName().c_str(),
ge_model->GetModelTaskDefPtr()->task_size()); ge_model->GetModelTaskDefPtr()->task_size());
hybrid_model_.known_shape_sub_models_.emplace(sub_graph.GetParentNode(), ge_model); hybrid_model_.known_shape_sub_models_.emplace(parent_node, ge_model);
} }
return SUCCESS; return SUCCESS;
@ -1098,7 +1098,7 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
GE_CHECK_NOTNULL(net_output_desc); GE_CHECK_NOTNULL(net_output_desc);
auto out_index = static_cast<uint32_t>(src_wrapped_node_out_anchor->GetIdx()); auto out_index = static_cast<uint32_t>(src_wrapped_node_out_anchor->GetIdx());
GELOGD("src graph = %s, src parent output index = %d", src_graph->GetName().c_str(), out_index); GELOGD("src graph = %s, src parent output index = %u", src_graph->GetName().c_str(), out_index);
// link src to outputs of DataNode // link src to outputs of DataNode
auto input_size = net_output_desc->GetAllInputsSize(); auto input_size = net_output_desc->GetAllInputsSize();
@ -1237,7 +1237,8 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
uint32_t parent_index = 0; uint32_t parent_index = 0;
GE_CHK_STATUS_RET_NOLOG(GetParentNodeOutputIndex(*net_output_desc, in_data_anchor->GetIdx(), parent_index)); GE_CHK_STATUS_RET_NOLOG(GetParentNodeOutputIndex(*net_output_desc, in_data_anchor->GetIdx(), parent_index));
GELOGD("Got parent output index = %u", parent_index); GELOGD("Got parent output index = %u", parent_index);
node_item.ref_outputs.emplace(parent_index, src_node); GE_CHECK_LE(parent_index, INT32_MAX);
node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node);
} }
// Data nodes marked with REF_VAR_SRC_VAR_NAME // Data nodes marked with REF_VAR_SRC_VAR_NAME

@ -26,9 +26,9 @@
namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
namespace { namespace {
const char * const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph";
const char * const kNodeTypeRetVal = "_RetVal"; const char *const kNodeTypeRetVal = "_RetVal";
std::set<std::string> kControlOpTypes { std::set<std::string> kControlOpTypes{
IF, STATELESSIF, CASE, WHILE, STATELESSWHILE IF, STATELESSIF, CASE, WHILE, STATELESSWHILE
}; };
@ -54,7 +54,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
return SUCCESS; return SUCCESS;
} }
Status ParseOutputMapping(OpDescPtr op_desc, FusedSubgraph &fused_subgraph) { Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgraph) {
uint32_t parent_index = 0; uint32_t parent_index = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED, GELOGE(FAILED,
@ -74,7 +74,7 @@ Status ParseFusedSubgraph(NodeItem &node_item) {
} }
GELOGI("[%s] Start to parse fused subgraph.", node_item.node_name.c_str()); GELOGI("[%s] Start to parse fused subgraph.", node_item.node_name.c_str());
auto fused_subgraph = std::unique_ptr<FusedSubgraph>(new (std::nothrow)FusedSubgraph()); auto fused_subgraph = std::unique_ptr<FusedSubgraph>(new(std::nothrow)FusedSubgraph());
GE_CHECK_NOTNULL(fused_subgraph); GE_CHECK_NOTNULL(fused_subgraph);
ComputeGraphPtr fused_graph; ComputeGraphPtr fused_graph;
@ -110,19 +110,39 @@ bool IsControlOp(const std::string &op_type) {
return kControlOpTypes.count(op_type) > 0; return kControlOpTypes.count(op_type) > 0;
} }
NodeItem::NodeItem(NodePtr node): node(std::move(node)) { NodeItem::NodeItem(NodePtr node) : node(std::move(node)) {
this->op_desc = this->node->GetOpDesc().get(); this->op_desc = this->node->GetOpDesc().get();
this->node_id = this->op_desc->GetId();
this->num_inputs = this->op_desc->GetInputsSize();
this->num_outputs = this->op_desc->GetOutputsSize();
this->node_name = this->node->GetName(); this->node_name = this->node->GetName();
this->node_type = this->node->GetType(); this->node_type = this->node->GetType();
} }
Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_item) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
std::unique_ptr<NodeItem> instance(new(std::nothrow)NodeItem(node));
GE_CHECK_NOTNULL(instance);
GE_CHK_STATUS_RET(instance->Init(), "Failed to init NodeItem [%s] .", node->GetName().c_str());
node_item = std::move(instance);
return SUCCESS;
}
Status NodeItem::Init() { Status NodeItem::Init() {
int32_t unknown_shape_type_val = 0; GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());
if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
has_optional_inputs = true;
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
const auto &input_desc = op_desc->MutableInputDesc(i);
if (input_desc == nullptr) {
GELOGD("[%s] Input[%zu] is optional and invalid", NodeName().c_str(), i);
} else {
input_desc_indices_.emplace_back(static_cast<uint32_t>(i));
}
}
}
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
@ -132,16 +152,15 @@ Status NodeItem::Init() {
node->GetName().c_str()); node->GetName().c_str());
} }
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
if (is_dynamic) { if (is_dynamic) {
for (int i = 0; i < num_inputs; ++i) { for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = op_desc->MutableInputDesc(i); const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc); GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) { if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static.push_back(false); is_input_shape_static_.push_back(false);
} else { } else {
num_static_input_shapes++; num_static_input_shapes++;
is_input_shape_static.push_back(true); is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
} }
@ -155,6 +174,16 @@ Status NodeItem::Init() {
break; break;
} }
} }
if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
} }
return SUCCESS; return SUCCESS;
@ -186,7 +215,7 @@ std::string NodeItem::DebugString() const {
for (auto &items : outputs) { for (auto &items : outputs) {
ss << ", output[" << index++ << "]: "; ss << ", output[" << index++ << "]: ";
for (auto &item : items) { for (auto &item : items) {
ss << "(" << item.second->NodeName() << ":" <<item.first << "), "; ss << "(" << item.second->NodeName() << ":" << item.first << "), ";
} }
} }
@ -196,13 +225,60 @@ std::string NodeItem::DebugString() const {
void NodeItem::SetToDynamic() { void NodeItem::SetToDynamic() {
num_static_input_shapes = 0; num_static_input_shapes = 0;
is_dynamic = true; is_dynamic = true;
for (size_t i = 0; i < is_input_shape_static.size(); ++i) { for (size_t i = 0; i < is_input_shape_static_.size(); ++i) {
is_input_shape_static[i] = false; is_input_shape_static_[i] = false;
} }
if (kernel_task != nullptr && !kernel_task->IsSupportDynamicShape()) { if (kernel_task != nullptr && !kernel_task->IsSupportDynamicShape()) {
GELOGD("[%s] Dynamic shape is not supported, clear node task.", node_name.c_str()); GELOGD("[%s] Dynamic shape is not supported, clear node task.", node_name.c_str());
kernel_task = nullptr; kernel_task = nullptr;
} }
} }
GeTensorDescPtr NodeItem::MutableInputDesc(int index) const {
if (!has_optional_inputs) {
return op_desc->MutableInputDesc(static_cast<uint32_t>(index));
}
if (index < 0 || index >= num_inputs) {
GELOGE(PARAM_INVALID,
"[%s] Invalid input index, num inputs = %d, index = %d",
node_name.c_str(),
num_inputs,
index);
return nullptr;
}
return op_desc->MutableInputDesc(input_desc_indices_[index]);
}
Status NodeItem::GetCanonicalInputIndex(uint32_t index, int &canonical_index) const {
if (!has_optional_inputs) {
canonical_index = index;
return SUCCESS;
}
auto iter = std::find(input_desc_indices_.begin(), input_desc_indices_.end(), index);
if (iter == input_desc_indices_.end()) {
GELOGE(INTERNAL_ERROR, "[%s] Invalid input index: %u", node_name.c_str(), index);
return INTERNAL_ERROR;
}
canonical_index = static_cast<int>(iter - input_desc_indices_.begin());
GELOGD("[%s] Canonicalize input index from [%u] to [%d]", node_name.c_str(), index, canonical_index);
return SUCCESS;
}
bool NodeItem::IsInputShapeStatic(int index) const {
if (!is_dynamic) {
return true;
}
if (static_cast<size_t>(index) >= is_input_shape_static_.size()) {
GELOGE(PARAM_INVALID, "Input index(%d) out of range: [0, %zu)", index, is_input_shape_static_.size());
return false;
}
return is_input_shape_static_[index];
}
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge

@ -40,10 +40,8 @@ bool IsControlOp(const std::string &op_type);
// for caching static information across execution // for caching static information across execution
struct NodeItem { struct NodeItem {
explicit NodeItem(NodePtr node);
~NodeItem() = default; ~NodeItem() = default;
static Status Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_item);
Status Init();
const std::string &NodeName() const { const std::string &NodeName() const {
return node_name; return node_name;
@ -53,6 +51,20 @@ struct NodeItem {
return node_type; return node_type;
} }
OpDescPtr GetOpDesc() const {
return node->GetOpDesc();
}
bool IsInputShapeStatic(int index) const;
GeTensorDescPtr MutableOutputDesc(int index) const {
return op_desc->MutableOutputDesc(static_cast<uint32_t>(index));
}
GeTensorDescPtr MutableInputDesc(int index) const;
Status GetCanonicalInputIndex(uint32_t index, int &canonical_index) const;
bool IsControlOp() const; bool IsControlOp() const;
void SetToDynamic(); void SetToDynamic();
@ -61,14 +73,15 @@ struct NodeItem {
NodePtr node; NodePtr node;
OpDesc *op_desc; OpDesc *op_desc;
int node_id; int node_id = -1;
int num_inputs; int num_inputs = 0;
int num_outputs; int num_outputs = 0;
int input_start = -1; int input_start = -1;
int output_start = -1; int output_start = -1;
bool is_dynamic = false; bool is_dynamic = false;
bool has_observer = false; bool has_observer = false;
bool has_optional_inputs = false;
bool is_output_shape_static = true;
UnknowShapeOpType shape_inference_type = DEPEND_IN_SHAPE; UnknowShapeOpType shape_inference_type = DEPEND_IN_SHAPE;
std::string node_name; std::string node_name;
std::string node_type; std::string node_type;
@ -76,9 +89,8 @@ struct NodeItem {
std::vector<ge::NodePtr> dependents_for_execution; std::vector<ge::NodePtr> dependents_for_execution;
std::set<int> to_const_output_id_list; std::set<int> to_const_output_id_list;
vector<NodeItem *> inputs;
// src_output_id, dst_anchor_id, dst_node // src_output_id, dst_anchor_id, dst_node
vector<vector<pair<uint32_t, NodeItem *>>> outputs; vector<vector<pair<int, NodeItem *>>> outputs;
std::shared_ptr<NodeTask> kernel_task; std::shared_ptr<NodeTask> kernel_task;
std::unique_ptr<FusedSubgraph> fused_subgraph; std::unique_ptr<FusedSubgraph> fused_subgraph;
@ -86,10 +98,14 @@ struct NodeItem {
std::map<int, ge::NodePtr> ref_outputs; std::map<int, ge::NodePtr> ref_outputs;
std::map<int, int> reuse_inputs; std::map<int, int> reuse_inputs;
std::map<int, int> reuse_outputs; std::map<int, int> reuse_outputs;
std::vector<bool> is_input_shape_static;
bool is_output_shape_static = true;
int num_static_input_shapes = 0; int num_static_input_shapes = 0;
private:
explicit NodeItem(NodePtr node);
Status Init();
std::vector<bool> is_input_shape_static_;
std::vector<uint32_t> input_desc_indices_;
}; };
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge

@ -157,9 +157,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
return ret; return ret;
} }
auto op_desc = context.GetNodeItem().op_desc; GELOGI("[%s] ExecuteAsync Start.", context.GetNodeName());
GE_CHECK_NOTNULL(op_desc);
GELOGI("[%s] ExecuteAsync Start.", op_desc->GetName().c_str());
for (auto it = tasks_.begin(); it != tasks_.end(); ++it) { for (auto it = tasks_.begin(); it != tasks_.end(); ++it) {
// AtomicAddrClean has 2 tasks // AtomicAddrClean has 2 tasks
if (tasks_.size() == 2 && it == tasks_.begin() && !(*(tasks_.rbegin()))->GetClearAtomic()) { if (tasks_.size() == 2 && it == tasks_.begin() && !(*(tasks_.rbegin()))->GetClearAtomic()) {
@ -177,15 +175,13 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeRegisterCallback] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeRegisterCallback] End");
} }
GELOGD("[%s] ExecuteAsync End.", op_desc->GetName().c_str()); GELOGD("[%s] ExecuteAsync End.", context.GetNodeName());
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End");
return SUCCESS; return SUCCESS;
} }
Status AiCoreNodeTask::UpdateArgs(TaskContext &context) { Status AiCoreNodeTask::UpdateArgs(TaskContext &context) {
auto op_desc = context.GetNodeItem().op_desc; GELOGI("[%s] AiCoreNodeTask UpdateArgs Start.", context.GetNodeName());
GE_CHECK_NOTNULL(op_desc);
GELOGI("[%s] AiCoreNodeTask UpdateArgs Start.", op_desc->GetName().c_str());
for (auto it = tasks_.rbegin(); it != tasks_.rend(); ++it) { for (auto it = tasks_.rbegin(); it != tasks_.rend(); ++it) {
GE_CHK_STATUS_RET_NOLOG((*it)->UpdateArgs(context)); GE_CHK_STATUS_RET_NOLOG((*it)->UpdateArgs(context));
// AtomicAddrClean has 2 tasks // AtomicAddrClean has 2 tasks
@ -193,7 +189,7 @@ Status AiCoreNodeTask::UpdateArgs(TaskContext &context) {
break; break;
} }
} }
GELOGI("[%s] AiCoreNodeTask UpdateArgs End.", op_desc->GetName().c_str()); GELOGI("[%s] AiCoreNodeTask UpdateArgs End.", context.GetNodeName());
return SUCCESS; return SUCCESS;
} }

@ -37,7 +37,10 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
ext_info_.reset(new(std::nothrow)uint8_t[ext_info_len_]); ext_info_.reset(new(std::nothrow)uint8_t[ext_info_len_]);
GE_CHECK_NOTNULL(ext_info_); GE_CHECK_NOTNULL(ext_info_);
(void) memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()); if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) {
GELOGE(FAILED, "[%s] Failed to coy ext info", node_name_.c_str());
return FAILED;
}
input_shape_and_type_.clear(); input_shape_and_type_.clear();
output_shape_and_type_.clear(); output_shape_and_type_.clear();
@ -94,7 +97,7 @@ Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) {
auto need_len = input_num_ * sizeof(AicpuShapeAndType); auto need_len = input_num_ * sizeof(AicpuShapeAndType);
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID,
"Node[%s] parse ext input shape failed as infoLen must be " "Node[%s] parse ext input shape failed as infoLen must be "
"input_num[%zu]*sizeof(ShapeAndType)[%zu] but %u.", "input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen);
auto input = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg); auto input = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);
@ -115,7 +118,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
auto need_len = output_num_ * sizeof(AicpuShapeAndType); auto need_len = output_num_ * sizeof(AicpuShapeAndType);
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID,
"Node[%s] parse ext output shape failed as infoLen must be " "Node[%s] parse ext output shape failed as infoLen must be "
"output_num[%zu]*sizeof(ShapeAndType)[%zu] but %u.", "output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen);
auto output = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg); auto output = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);

File diff suppressed because it is too large Load Diff

@ -31,14 +31,12 @@ REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::COMPILED_SUBGR
Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function<void()> done_callback) {
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start");
GELOGI("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName()); GELOGD("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName());
if (davinci_model_->GetTaskList().size() == 0) { if (davinci_model_->GetTaskList().empty()) {
GELOGW("KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo."); GELOGW("KnownNodeExecutor::ExecuteAsync davinci model has no taskinfo.");
// todo if data is connected to netoutput, forward address ? copy data? // todo if data is connected to netoutput, forward address ? copy data?
if (context.NumInputs() == context.NumOutputs()){ if (context.NumInputs() == context.NumOutputs()){
GELOGW("[%s] KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo.",
context.GetNodeName());
for (int i = 0; i < context.NumInputs(); ++i) { for (int i = 0; i < context.NumInputs(); ++i) {
auto tensor = context.MutableInput(i); auto tensor = context.MutableInput(i);
GE_CHECK_NOTNULL(tensor); GE_CHECK_NOTNULL(tensor);
@ -54,24 +52,22 @@ Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function<void()
} }
rtError_t rt_ret; rtError_t rt_ret;
GELOGI("rtModelExecute start.");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] Start"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] Start");
rt_ret = rtModelExecute(davinci_model_->GetRtModelHandle(), context.GetStream(), 0); rt_ret = rtModelExecute(davinci_model_->GetRtModelHandle(), context.GetStream(), 0);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); return FAILED;); GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); return FAILED;);
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] End");
GELOGI("rtModelExecute end");
GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(done_callback)); GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(done_callback));
GELOGI("[%s] KnownNodeTask::ExecuteAsync success.", context.GetNodeName()); GELOGD("[%s] KnownNodeTask::ExecuteAsync success.", context.GetNodeName());
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] End");
return SUCCESS; return SUCCESS;
} }
Status KnownNodeTask::UpdateArgs(TaskContext &context) { Status KnownNodeTask::UpdateArgs(TaskContext &context) {
GELOGI("[%s] KnownNodeExecutor::UpdateArgs in.", context.GetNodeName()); GELOGD("[%s] KnownNodeExecutor::UpdateArgs in.", context.GetNodeName());
if (davinci_model_->GetTaskList().size() == 0) { if (davinci_model_->GetTaskList().empty()) {
GELOGW("KnownNodeExecutor::UpdateArgs davinci moel has no taskinfo."); GELOGW("KnownNodeExecutor::UpdateArgs davinci model has no taskinfo.");
return SUCCESS; return SUCCESS;
} }
@ -91,7 +87,7 @@ Status KnownNodeTask::UpdateArgs(TaskContext &context) {
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownNodeArgs(inputs, outputs), GE_CHK_STATUS_RET(davinci_model_->UpdateKnownNodeArgs(inputs, outputs),
"known node task update known node args failed."); "known node task update known node args failed.");
GELOGI("[%s] KnownNodeExecutor::UpdateArgs success, task_size = %d:", context.GetNodeName(), GELOGD("[%s] KnownNodeExecutor::UpdateArgs success, task_size = %zu", context.GetNodeName(),
davinci_model_->GetTaskList().size()); davinci_model_->GetTaskList().size());
return SUCCESS; return SUCCESS;
} }
@ -123,7 +119,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed); davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed);
// update mem base // update mem base
davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer)); davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer));
GELOGI("KnownNodeTask::Init mem base is %p, size %u.", GELOGI("KnownNodeTask::Init mem base is %p, size %lu.",
davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
} }
if (!load_flag_) { if (!load_flag_) {
@ -138,7 +134,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
} }
Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
GELOGI("[%s] KnownNodeExecutor::PrepareTask in.", context.GetNodeName()); GELOGD("[%s] KnownNodeExecutor::PrepareTask in.", context.GetNodeName());
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorPrepareTask] Start"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorPrepareTask] Start");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorTaskInit] Start"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorTaskInit] Start");
GE_CHK_STATUS_RET(task.Init(context), "known node init davinci model failed."); GE_CHK_STATUS_RET(task.Init(context), "known node init davinci model failed.");
@ -148,7 +144,7 @@ Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons
GE_CHK_STATUS_RET(task.UpdateArgs(context), "known node task update args failed."); GE_CHK_STATUS_RET(task.UpdateArgs(context), "known node task update args failed.");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorUpdateArgs] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorUpdateArgs] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorPrepareTask] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorPrepareTask] End");
GELOGI("[%s] KnownNodeExecutor::PrepareTask success.", context.GetNodeName()); GELOGD("[%s] KnownNodeExecutor::PrepareTask success.", context.GetNodeName());
return SUCCESS; return SUCCESS;
} }
@ -167,7 +163,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
davinci_model->SetKnownNode(true); davinci_model->SetKnownNode(true);
// set model id as root node's node id // set model id as root node's node id
davinci_model->SetId(node->GetOpDesc()->GetId()); davinci_model->SetId(node->GetOpDesc()->GetId());
GELOGD("KnownNodeExecutor::LoadTask node id %u.", node->GetOpDesc()->GetId()); GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId());
GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed."); GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed.");

@ -119,6 +119,7 @@ Status IfOpNodeTask::Init(const NodePtr &node, const HybridModel &model) {
Status IfOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::function<void()> &done_callback) const { Status IfOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::function<void()> &done_callback) const {
auto cond_tensor_desc = task_context.MutableInputDesc(kIfCondIndex); auto cond_tensor_desc = task_context.MutableInputDesc(kIfCondIndex);
GE_CHECK_NOTNULL(cond_tensor_desc);
auto data_type = cond_tensor_desc->GetDataType(); auto data_type = cond_tensor_desc->GetDataType();
const auto &shape = cond_tensor_desc->MutableShape(); const auto &shape = cond_tensor_desc->MutableShape();
bool cond_val = false; bool cond_val = false;
@ -362,14 +363,16 @@ Status WhileOpNodeTask::MoveOutputs2Inputs(TaskContext &task_context) {
*input_tensor = *output_tensor; *input_tensor = *output_tensor;
output_tensor->Destroy(); output_tensor->Destroy();
auto input_tensor_desc = task_context.MutableInputDesc(i);
GE_CHECK_NOTNULL(input_tensor_desc);
auto output_tensor_desc = task_context.MutableOutputDesc(i); auto output_tensor_desc = task_context.MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_tensor_desc); GE_CHECK_NOTNULL(output_tensor_desc);
GELOGD("[%s] To update input shape[%d] by output shape. from [%s] to [%s]", GELOGD("[%s] To update input shape[%d] by output shape. from [%s] to [%s]",
task_context.GetNodeName(), task_context.GetNodeName(),
i, i,
task_context.MutableInputDesc(i)->GetShape().ToString().c_str(), input_tensor_desc->GetShape().ToString().c_str(),
output_tensor_desc->GetShape().ToString().c_str()); output_tensor_desc->GetShape().ToString().c_str());
*task_context.MutableInputDesc(i) = *output_tensor_desc; *input_tensor_desc = *output_tensor_desc;
} }
return SUCCESS; return SUCCESS;

@ -67,14 +67,16 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do
} }
const NodeItem &node_item = context.GetNodeItem(); const NodeItem &node_item = context.GetNodeItem();
const OpDescPtr op_desc = MakeShared<OpDesc>(*(node_item.op_desc)); const OpDescPtr op_desc = node_item.GetOpDesc();
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
HcomOpertion op_info; HcomOpertion op_info;
op_info.hcclType = op_desc->GetType(); op_info.hcclType = op_desc->GetType();
op_info.inputPtr = inputs.empty() ? nullptr : inputs[0]; op_info.inputPtr = inputs.empty() ? nullptr : inputs[0];
op_info.outputPtr = outputs.empty() ? nullptr : outputs[0]; op_info.outputPtr = outputs.empty() ? nullptr : outputs[0];
ge::DataType src_data_type = op_desc->GetInputDescPtr(0)->GetDataType(); auto input_desc = node_item.MutableInputDesc(0);
GE_CHECK_NOTNULL(input_desc);
ge::DataType src_data_type = input_desc->GetDataType();
auto iter = kConstOpHcclDataType.find(static_cast<int64_t>(src_data_type)); auto iter = kConstOpHcclDataType.find(static_cast<int64_t>(src_data_type));
if (iter == kConstOpHcclDataType.end()) { if (iter == kConstOpHcclDataType.end()) {
GELOGE(PARAM_INVALID, "kConstOpHcclDataType find failed."); GELOGE(PARAM_INVALID, "kConstOpHcclDataType find failed.");
@ -128,8 +130,9 @@ Status RdmaNodeTask::UpdateArgs(TaskContext &context) { return SUCCESS; }
Status RdmaNodeTask::Init(TaskContext &context) { Status RdmaNodeTask::Init(TaskContext &context) {
GELOGI("[%s] RdmaNodeTask::Init in.", context.GetNodeName()); GELOGI("[%s] RdmaNodeTask::Init in.", context.GetNodeName());
const NodeItem &node_item = context.GetNodeItem(); const NodeItem &node_item = context.GetNodeItem();
GE_CHECK_NOTNULL(node_item.op_desc); auto op_desc = node_item.GetOpDesc();
auto remote_idx = node_item.op_desc->GetInputIndexByName("remote"); GE_CHECK_NOTNULL(op_desc);
auto remote_idx = op_desc->GetInputIndexByName("remote");
auto in_data_anchor = node_item.node->GetInDataAnchor(remote_idx); auto in_data_anchor = node_item.node->GetInDataAnchor(remote_idx);
GE_CHECK_NOTNULL(in_data_anchor); GE_CHECK_NOTNULL(in_data_anchor);
auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); auto out_data_anchor = in_data_anchor->GetPeerOutAnchor();
@ -141,7 +144,7 @@ Status RdmaNodeTask::Init(TaskContext &context) {
if (node_item.node->GetType() == HCOMREMOTEREAD) { if (node_item.node->GetType() == HCOMREMOTEREAD) {
local_index_ = 0; local_index_ = 0;
} else { } else {
local_index_ = node_item.op_desc->GetInputIndexByName("local"); local_index_ = op_desc->GetInputIndexByName("local");
} }
return SUCCESS; return SUCCESS;
} }

@ -47,7 +47,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
std::vector<ConstGeTensorPtr> inputs; std::vector<ConstGeTensorPtr> inputs;
for (int32_t i = 0; i < context.NumInputs(); ++i) { for (int32_t i = 0; i < context.NumInputs(); ++i) {
const auto &input_desc = op_desc->GetInputDesc(i); auto input_desc_ptr = context.GetInputDesc(i);
GE_CHECK_NOTNULL(input_desc_ptr);
const auto &input_desc = *input_desc_ptr;
GE_CHECK_NOTNULL(context.GetInput(i)); GE_CHECK_NOTNULL(context.GetInput(i));
auto in_tensor = MakeShared<GeTensor>(input_desc, auto in_tensor = MakeShared<GeTensor>(input_desc,
reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()), reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()),
@ -56,8 +58,7 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType());
in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape());
inputs.emplace_back(in_tensor); inputs.emplace_back(in_tensor);
GELOGI("node:%s allocate input %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, GELOGI("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size());
reinterpret_cast<const uint8_t *>(in_tensor->GetData().data()), in_tensor->GetData().size());
} }
std::vector<GeTensorPtr> outputs; std::vector<GeTensorPtr> outputs;
@ -78,8 +79,7 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType());
out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape());
outputs.emplace_back(out_tensor); outputs.emplace_back(out_tensor);
GELOGI("node:%s allocate output %d, addr=%p, size=%zu", op_desc->GetName().c_str(), i, GELOGI("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size());
reinterpret_cast<const uint8_t *>(out_tensor->GetData().data()), out_tensor->GetData().size());
} }
return HostCpuEngine::GetInstance().Run(node_, inputs, outputs); return HostCpuEngine::GetInstance().Run(node_, inputs, outputs);

@ -16,6 +16,7 @@
#include "hybrid/node_executor/node_executor.h" #include "hybrid/node_executor/node_executor.h"
#include "framework/common/debug/log.h" #include "framework/common/debug/log.h"
#include "common/math/math_util.h"
#include "graph/utils/node_utils.h" #include "graph/utils/node_utils.h"
#include "init/gelib.h" #include "init/gelib.h"
#include "graph/utils/tensor_utils.h" #include "graph/utils/tensor_utils.h"
@ -138,8 +139,9 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const {
GELOGD("[%s] Skipping CalcOpRunningParam for PartitionedCall.", node.GetName().c_str()); GELOGD("[%s] Skipping CalcOpRunningParam for PartitionedCall.", node.GetName().c_str());
return SUCCESS; return SUCCESS;
} }
for (size_t i = 0; i < node.GetOpDesc()->GetOutputsSize(); ++i) { for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) {
GeTensorDescPtr output_tensor = op_desc->MutableOutputDesc(static_cast<uint32_t>(i)); GeTensorDescPtr output_tensor = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
GE_CHECK_NOTNULL(output_tensor);
TensorUtils::SetSize(*(output_tensor.get()), 0); TensorUtils::SetSize(*(output_tensor.get()), 0);
} }
@ -155,6 +157,10 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const {
int64_t output_mem_size = 0; int64_t output_mem_size = 0;
GE_CHK_STATUS_RET(TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size), GE_CHK_STATUS_RET(TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size),
"hccl calc tensor mem size failed."); "hccl calc tensor mem size failed.");
GE_CHK_STATUS_RET(CheckInt64AddOverflow(output_mem_size, MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1),
"[%s] Invalid output mem size: %ld",
node.GetName().c_str(),
output_mem_size);
output_mem_size = ((output_mem_size + output_mem_size = ((output_mem_size +
MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1) / MEMORY_ALIGN_SIZE) * MEMORY_ALIGN_SIZE; MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1) / MEMORY_ALIGN_SIZE) * MEMORY_ALIGN_SIZE;
TensorUtils::SetSize(output_tensor, output_mem_size); TensorUtils::SetSize(output_tensor, output_mem_size);

@ -253,7 +253,7 @@ Status TaskContext::AllocateOutput(int index,
Status TaskContext::AllocateOutputs(AllocationAttr *attr) { Status TaskContext::AllocateOutputs(AllocationAttr *attr) {
for (int i = 0; i < node_item_->num_outputs; ++i) { for (int i = 0; i < node_item_->num_outputs; ++i) {
const auto &output_desc = node_item_->op_desc->MutableOutputDesc(i); const auto &output_desc = node_item_->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc); GE_CHECK_NOTNULL(output_desc);
uint32_t mem_type = 0; uint32_t mem_type = 0;
(void)AttrUtils::GetInt(output_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); (void)AttrUtils::GetInt(output_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type);
@ -349,7 +349,7 @@ Status TaskContext::PropagateOutputs() {
auto dst_input_idx = dst_input_index_and_node.first; auto dst_input_idx = dst_input_index_and_node.first;
auto dst_node_item = dst_input_index_and_node.second; auto dst_node_item = dst_input_index_and_node.second;
auto input_offset = dst_node_item->input_start + dst_input_idx; auto input_offset = dst_node_item->input_start + dst_input_idx;
GELOGI( GELOGD(
"Propagate output of node %s, output index = %d, dst node = %s, " "Propagate output of node %s, output index = %d, dst node = %s, "
"dst_input_index = %d, dst_input_offset = %d.", "dst_input_index = %d, dst_input_offset = %d.",
node_item_->NodeName().c_str(), node_item_->NodeName().c_str(),
@ -394,20 +394,20 @@ void TaskContext::ReleaseInput(int index) {
} }
} }
ConstGeTensorDescPtr TaskContext::GetOutputDesc(int index) { ConstGeTensorDescPtr TaskContext::GetOutputDesc(int index) const {
return node_item_->op_desc->MutableOutputDesc(static_cast<uint32_t>(index)); return node_item_->MutableOutputDesc(static_cast<uint32_t>(index));
} }
ConstGeTensorDescPtr TaskContext::GetInputDesc(int index) { ConstGeTensorDescPtr TaskContext::GetInputDesc(int index) const {
return node_item_->op_desc->MutableInputDesc(static_cast<uint32_t>(index)); return node_item_->MutableInputDesc(index);
} }
GeTensorDescPtr TaskContext::MutableInputDesc(int index) { GeTensorDescPtr TaskContext::MutableInputDesc(int index) const {
return node_item_->op_desc->MutableInputDesc(static_cast<uint32_t>(index)); return node_item_->MutableInputDesc(index);
} }
GeTensorDescPtr TaskContext::MutableOutputDesc(int index) { GeTensorDescPtr TaskContext::MutableOutputDesc(int index) const {
return node_item_->op_desc->MutableOutputDesc(static_cast<uint32_t>(index)); return node_item_->MutableOutputDesc(static_cast<uint32_t>(index));
} }
bool TaskContext::IsForceInferShape() const { bool TaskContext::IsForceInferShape() const {

@ -46,10 +46,10 @@ class TaskContext {
const NodeItem &GetNodeItem() const; const NodeItem &GetNodeItem() const;
const char *GetNodeName() const; const char *GetNodeName() const;
TensorValue *MutableInput(int index); TensorValue *MutableInput(int index);
ConstGeTensorDescPtr GetInputDesc(int index); ConstGeTensorDescPtr GetInputDesc(int index) const;
ConstGeTensorDescPtr GetOutputDesc(int index); ConstGeTensorDescPtr GetOutputDesc(int index) const;
GeTensorDescPtr MutableInputDesc(int index); GeTensorDescPtr MutableInputDesc(int index) const;
GeTensorDescPtr MutableOutputDesc(int index); GeTensorDescPtr MutableOutputDesc(int index) const;
void ReleaseInput(int index); void ReleaseInput(int index);
const TensorValue *GetInput(int index) const; const TensorValue *GetInput(int index) const;
const TensorValue *GetOutput(int index) const; const TensorValue *GetOutput(int index) const;

Loading…
Cancel
Save