!1188 dynamic shape over flow

From: @zhou_chao1993
Reviewed-by: @xchu42,@ji_chen
Signed-off-by: @ji_chen
pull/1188/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 1b845b9ac2

@ -103,6 +103,7 @@ set(TRAIN_SRC_LIST
"common/profiling/profiling_manager.cc"
"common/dump/dump_manager.cc"
"common/dump/dump_properties.cc"
"common/dump/opdebug_register.cc"
"common/dump/dump_op.cc"
"common/profiling/ge_profiling.cc"
"common/profiling/ge_runner_profiling.cc"
@ -427,6 +428,7 @@ set(INFER_SRC_LIST
"common/dump/dump_properties.cc"
"common/dump/dump_manager.cc"
"common/dump/dump_op.cc"
"common/dump/opdebug_register.cc"
"common/dump/dump_server.cc"
"common/helper/model_cache_helper.cc"
"ge_local_engine/engine/host_cpu_engine.cc"

@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties(
uint64_t session_id) {
std::lock_guard<std::mutex> lock(mutex_);
// If session_id is not found in dump_properties_map_, operator[] will insert one.
return dump_properties_map_[session_id];
auto iter = dump_properties_map_.find(session_id);
if (iter != dump_properties_map_.end()) {
return iter->second;
}
static DumpProperties default_properties;
return default_properties;
}
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties(

@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() {
op_mapping_info.set_dump_path(dump_path);
op_mapping_info.set_flag(kAicpuLoadFlag);
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
if (!dynamic_model_name_.empty()) {
op_mapping_info.set_model_name(dynamic_model_name_);
op_mapping_info.set_model_id(dynamic_model_id_);
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) {
op_mapping_info.set_model_name(dynamic_model_name_);
}
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() {
}
op_mapping_info.mutable_task()->Add(std::move(task));
}
if (dump_properties_.GetDumpMode() == kDumpAll) {
if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) {
auto ret = DumpOutput(task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed when in dumping all");

@ -81,11 +81,11 @@ class DumpProperties {
const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;}
private:
void CopyFrom(const DumpProperties &other);
void SetDumpDebugOptions();
std::string enable_dump_;
std::string enable_dump_debug_;

@ -0,0 +1,148 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "opdebug_register.h"
namespace {
const size_t kOpDebugMemorySize = 2048UL;
const size_t kDebugP2pSize = 8UL;
} // namespace
namespace ge {
OpdebugRegister::~OpdebugRegister() {}
Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) {
GELOGD("Start to register debug for model in overflow");
auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret);
return ret;
}
uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0;
auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id);
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
return SUCCESS;
}
void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) {
rtError_t rt_ret = RT_ERROR_NONE;
if (model_handle != nullptr) {
GELOGD("start to call rtDebugUnRegister in model overflow.");
rt_ret = rtDebugUnRegister(model_handle);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret);
}
}
if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
op_debug_addr_ = nullptr;
}
if (p2p_debug_addr_ != nullptr) {
rt_ret = rtFree(p2p_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
p2p_debug_addr_ = nullptr;
}
return;
}
Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) {
GELOGD("Start to register debug for stream in stream overflow");
auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret);
return ret;
}
uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0;
#ifdef ONLY_COMPILE_OPEN_SRC
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
#endif
GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id);
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
return SUCCESS;
}
void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
rtError_t rt_ret = RT_ERROR_NONE;
#ifdef ONLY_COMPILE_OPEN_SRC
if (stream != nullptr) {
GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow.");
rt_ret = rtDebugUnRegisterForStream(stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret);
}
}
#endif
if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
op_debug_addr_ = nullptr;
}
if (p2p_debug_addr_ != nullptr) {
rt_ret = rtFree(p2p_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
p2p_debug_addr_ = nullptr;
}
return;
}
Status OpdebugRegister::MallocMemForOpdebug() {
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_));
// For data dump, aicpu needs the pointer to pointer that save the real debug address.
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
} // namespace ge

@ -0,0 +1,44 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_
#define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_
#include <map>
#include "common/debug/ge_log.h"
#include "common/debug/log.h"
#include "graph/load/model_manager/data_dumper.h"
namespace ge {
class OpdebugRegister {
public:
OpdebugRegister() = default;
~OpdebugRegister();
Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper);
void UnregisterDebugForModel(rtModel_t model_handle);
Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper);
void UnregisterDebugForStream(rtStream_t stream);
private:
Status MallocMemForOpdebug();
void *op_debug_addr_ = nullptr;
void *p2p_debug_addr_ = nullptr;
};
} // namespace ge
#endif // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_

@ -17,6 +17,7 @@ set(SRC_LIST
"../common/dump/dump_properties.cc"
"../common/dump/dump_manager.cc"
"../common/dump/dump_op.cc"
"../common/dump/opdebug_register.cc"
"../common/profiling/ge_profiling.cc"
"../graph/load/graph_loader.cc"
"../graph/execute/graph_execute.cc"

@ -36,21 +36,9 @@
namespace ge {
class DataDumper {
public:
explicit DataDumper(const RuntimeParam &rsh)
: model_name_(),
model_id_(0),
runtime_param_(rsh),
dev_mem_load_(nullptr),
dev_mem_unload_(nullptr),
op_list_(),
input_map_(),
load_flag_(false),
device_id_(0),
global_step_(0),
loop_per_iter_(0),
loop_cond_(0),
compute_graph_(nullptr),
ref_info_() {}
DataDumper() : runtime_param_{} {}
explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {}
~DataDumper();
@ -105,10 +93,10 @@ class DataDumper {
// for inference data dump
std::string om_name_;
uint32_t model_id_;
uint32_t model_id_ = 0;
const RuntimeParam &runtime_param_;
void *dev_mem_load_;
void *dev_mem_unload_;
void *dev_mem_load_ = nullptr;
void *dev_mem_unload_ = nullptr;
struct InnerDumpInfo;
struct InnerInputMapping;
@ -119,16 +107,15 @@ class DataDumper {
uint32_t end_graph_stream_id_ = 0;
bool is_end_graph_ = false;
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init
bool load_flag_;
uint32_t device_id_;
uintptr_t global_step_;
uintptr_t loop_per_iter_;
uintptr_t loop_cond_;
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init
bool load_flag_ = false;
uint32_t device_id_ = 0;
uintptr_t global_step_ = 0;
uintptr_t loop_per_iter_ = 0;
uintptr_t loop_cond_ = 0;
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init
void *l1_fusion_addr_ = nullptr;
uint32_t op_debug_task_id_ = 0;
uint32_t op_debug_stream_id_ = 0;
void *op_debug_addr_ = nullptr;
@ -150,14 +137,10 @@ class DataDumper {
void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
aicpu::dump::OpMappingInfo &op_mapping_info);
Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
Status GenerateInput(aicpu::dump::Input &input,
const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr,
size_t index);
Status GenerateOutput(aicpu::dump::Output &output,
const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr,
size_t index);
Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index);
Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index);
void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task);
};
struct DataDumper::InnerDumpInfo {

@ -232,6 +232,8 @@ DavinciModel::~DavinciModel() {
FreeP2PMem();
OpDebugUnRegister();
if (l1_fusion_addr_ != nullptr) {
GE_CHK_RT(rtFree(l1_fusion_addr_));
}
@ -242,8 +244,6 @@ DavinciModel::~DavinciModel() {
}
}
OpDebugUnRegister();
ReleaseTask();
CleanTbeHandle();
@ -568,77 +568,21 @@ Status DavinciModel::SetTSDevice() {
}
Status DavinciModel::OpDebugRegister() {
bool is_op_debug = false;
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug);
GELOGD("The value of op debug in ge_model is %d.", is_op_debug);
if (is_op_debug) {
debug_reg_mutex_.lock();
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_));
// For data dump, aicpu needs the pointer to pointer that save the real debug address.
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
uint32_t op_debug_mode = 0;
(void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode);
GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode);
uint32_t debug_task_id = 0;
uint32_t debug_stream_id = 0;
rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
if (GetDumpProperties().IsOpDebugOpen()) {
uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode();
auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_);
if (ret != SUCCESS) {
GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret);
return ret;
}
GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id);
is_op_debug_reg_ = true;
data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug);
}
return SUCCESS;
}
void DavinciModel::OpDebugUnRegister() {
if (is_op_debug_reg_) {
debug_reg_mutex_.unlock();
rtError_t rt_ret = RT_ERROR_NONE;
if (rt_model_handle_ != nullptr) {
GELOGD("start call debug_unregister.");
rt_ret = rtDebugUnRegister(rt_model_handle_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret);
}
}
if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
op_debug_addr_ = nullptr;
}
if (p2p_debug_addr_ != nullptr) {
rt_ret = rtFree(p2p_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
p2p_debug_addr_ = nullptr;
}
opdebug_register_.UnregisterDebugForModel(rt_model_handle_);
is_op_debug_reg_ = false;
}
return;

@ -29,6 +29,7 @@
#include "common/helper/om_file_helper.h"
#include "common/opskernel/ge_task_info.h"
#include "common/properties_manager.h"
#include "common/dump/opdebug_register.h"
#include "common/types.h"
#include "framework/common/util.h"
#include "graph/debug/ge_attr_define.h"
@ -984,6 +985,7 @@ class DavinciModel {
int64_t maxDumpOpNum_;
// for data dump
DataDumper data_dumper_;
OpdebugRegister opdebug_register_;
uint64_t iterator_count_;
bool is_l1_fusion_enable_;
map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init
@ -1021,8 +1023,6 @@ class DavinciModel {
// for op debug
mutex debug_reg_mutex_;
bool is_op_debug_reg_ = false;
void *op_debug_addr_ = nullptr;
void *p2p_debug_addr_ = nullptr;
bool is_online_infer_dynamic_ = false;
bool is_getnext_sink_dynamic_ = false;
vector<int32_t> cur_dynamic_dims_;

@ -85,6 +85,10 @@ Status HybridModelAsyncExecutor::Stop() {
ret = future_.get();
}
if (is_op_debug_reg_) {
op_debug_register_.UnregisterDebugForStream(stream_);
}
if (stream_ != nullptr) {
GE_CHK_RT(rtStreamDestroy(stream_));
stream_ = nullptr;
@ -101,6 +105,7 @@ Status HybridModelAsyncExecutor::Init() {
executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
GE_CHECK_NOTNULL(executor_);
GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine");
GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
@ -508,5 +513,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<
return SUCCESS;
}
Status HybridModelAsyncExecutor::DumpOpDebug() {
const DumpProperties &dump_properties = executor_->GetContext()->dump_properties;
if (dump_properties.IsOpDebugOpen()) {
GELOGD("Opdebug is open in hybrid engine");
uint32_t op_debug_mode = dump_properties.GetOpDebugMode();
GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_));
is_op_debug_reg_ = true;
data_dumper_.SetDumpProperties(dump_properties);
data_dumper_.SetModelName(model_->GetModelName());
data_dumper_.SetModelId(model_->GetModelId());
data_dumper_.SetDeviceId(model_->GetDeviceId());
void *global_step = nullptr;
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
if (varible_global_step != nullptr) {
global_step = const_cast<void *>(varible_global_step->GetData());
}
void *loop_per_iter = nullptr;
TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
if (varible_loop_per_iter != nullptr) {
loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData());
}
void *loop_cond = nullptr;
TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND);
if (varible_loop_cond != nullptr) {
loop_cond = const_cast<void *>(varible_loop_cond->GetData());
}
data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond);
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine");
GELOGD("Dump op debug SUCCESS in hybrid engine");
}
return SUCCESS;
}
} // namespace hybrid
} // namespace ge

@ -21,7 +21,9 @@
#include <future>
#include "external/ge/ge_api_error_codes.h"
#include "external/ge/ge_api_types.h"
#include "common/dump/opdebug_register.h"
#include "graph/load/model_manager/data_inputer.h"
#include "graph/load/model_manager/data_dumper.h"
#include "hybrid/executor/hybrid_model_executor.h"
#include "hybrid/executor/hybrid_model_pipeline_executor.h"
#include "runtime/stream.h"
@ -77,6 +79,8 @@ class HybridModelAsyncExecutor {
Status PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args);
Status DumpOpDebug();
std::mutex mu_;
HybridModel *model_;
uint32_t device_id_ = 0U;
@ -94,6 +98,9 @@ class HybridModelAsyncExecutor {
std::vector<bool> is_input_dynamic_;
std::shared_ptr<ModelListener> listener_;
string om_name_;
DataDumper data_dumper_;
bool is_op_debug_reg_ = false;
OpdebugRegister op_debug_register_;
};
} // namespace hybrid
} // namespace ge

@ -266,9 +266,9 @@ Status NodeDoneCallback::OnNodeDone() {
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End");
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start");
auto dump_path = context_->GetDumpProperties().GetDumpPath();
if (!dump_path.empty()) {
GELOGI("Start to dump dynamic shape,dump_path is %s", dump_path.c_str());
const DumpProperties &dump_properties = context_->GetDumpProperties();
if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) {
GELOGI("Start to dump dynamic shape op");
GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node");
}

@ -61,6 +61,10 @@ class HybridModel {
device_id_ = device_id;
}
uint32_t GetDeviceId() {
return device_id_;
}
void SetModelId(uint32_t model_id) {
model_id_ = model_id;
}

@ -17,6 +17,7 @@
#include "aicore_node_executor.h"
#include "framework/common/taskdown_common.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "external/runtime/rt_error_codes.h"
namespace ge {
namespace hybrid {
@ -189,6 +190,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
}
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context));
// save profiling data
uint32_t task_id = 0;
uint32_t stream_id = 0;
@ -259,6 +261,25 @@ void AiCoreNodeTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) {
workspace_sizes_ = workspace_sizes;
}
Status AiCoreNodeTask::CheckOverflow(TaskContext &context) {
const DumpProperties &dump_properties = context.GetDumpProperties();
if (dump_properties.IsOpDebugOpen()) {
GELOGD("Op %s is doing overflow check in hybrid engine", context.GetNodeName());
auto rt_ret = rtStreamSynchronize(context.GetStream());
if (rt_ret == ACL_ERROR_RT_AICORE_OVER_FLOW) {
context.SetOverFlow(true);
GELOGW("Dynamic shape op %s is over flow", context.GetNodeName());
return SUCCESS;
} else if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtstreamsynchronize failed");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
GELOGD("Opdebug is not open in hybrid engine");
return SUCCESS;
}
TaskCompilerFactory &TaskCompilerFactory::GetInstance() {
static TaskCompilerFactory instance;
return instance;

@ -62,6 +62,7 @@ class AiCoreNodeTask : public NodeTask {
const vector<int64_t> &GetWorkspaceSizes() const;
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
private:
Status CheckOverflow(TaskContext &context);
std::vector<std::unique_ptr<AiCoreOpTask>> tasks_;
std::vector<int64_t> workspace_sizes_;
};

@ -124,7 +124,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
}
if (!load_flag_) {
auto dump_properties = context.GetDumpProperties();
if (dump_properties.IsDumpOpen()) {
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = nullptr;
TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP);

@ -350,6 +350,14 @@ void TaskContext::SetStreamId(uint32_t stream_id) {
stream_id_ = stream_id;
}
void TaskContext::SetOverFlow(bool is_over_flow) {
is_over_flow_ = is_over_flow;
}
bool TaskContext::IsOverFlow() {
return is_over_flow_;
}
Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
GE_CHECK_NOTNULL(buffer);
if (ori_addr == nullptr) {

@ -65,6 +65,7 @@ class TaskContext {
int64_t GetSessionId() const;
uint64_t GetIterationNumber() const;
void NodeDone();
void OnError(Status error);
@ -106,6 +107,9 @@ class TaskContext {
uint32_t GetStreamId() const;
void SetStreamId(uint32_t stream_id);
void SetOverFlow(bool is_over_flow);
bool IsOverFlow();
Status Synchronize();
bool IsForceInferShape() const;
@ -138,6 +142,7 @@ class TaskContext {
uint32_t task_id_ = 0;
uint32_t stream_id_ = 0;
std::vector<TaskDescInfo> task_desc_info;
bool is_over_flow_ = false;
};
} // namespace hybrid
} // namespace ge

@ -491,21 +491,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) {
}
GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape.");
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(),
aicpu_ext_handle_->GetExtInfoLen(),
ext_info_addr_dev_,
aicpu_ext_handle_->GetExtInfoLen(),
RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_,
aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST));
for (size_t i = 0; i < num_outputs_; ++i) {
GeShape shape;
DataType data_type;
aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]),
"AiCpuCCTask Update [%zu]th output shape failed.", i);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.",
i);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]),
"AiCpuCCTask Update [%zu]th output desc failed.", i);
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.",
i);
}
}
GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished.");
@ -697,10 +694,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
const auto &shape_hbm = out_shape_hbm_[i];
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
std::unique_ptr<int64_t[]> shape_addr(new(std::nothrow) int64_t[dim_num]());
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
GE_CHECK_NOTNULL(shape_addr);
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size,
shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm,
result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
shape_dims.emplace_back(shape_addr[dim_idx]);
@ -711,13 +708,14 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]),
"AiCpuTask update [%zu]th output shape failed.", i);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]),
"AiCpuTask update [%zu]th output desc failed.", i);
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.",
i);
}
}
return SUCCESS;
}
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &outputs,
rtStream_t stream) {

@ -431,3 +431,7 @@ rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId)
{
return RT_ERROR_NONE;
}
rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId) {
return RT_ERROR_NONE;
}

@ -162,6 +162,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/common/dump/dump_properties.cc"
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
"${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc"
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/model/ge_root_model.cc"
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
@ -734,6 +735,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/transop_util_unittest.cc"
"common/datatype_transfer_unittest.cc"
"common/dump_manager_unittest.cc"
"common/opdebug_register_unittest.cc"
"common/format_transfer_unittest.cc"
"common/format_transfer_transpose_unittest.cc"
"common/format_transfer_nchw_5d_unittest.cc"

@ -0,0 +1,51 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include "common/dump/opdebug_register.h"
#include "common/debug/log.h"
#include "common/ge_inner_error_codes.h"
namespace ge {
class UTEST_opdebug_register : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
TEST_F(UTEST_opdebug_register, register_debug_for_model_success) {
OpdebugRegister opdebug_register;
rtModel_t model_handle = (void*)0x111;
uint32_t op_debug_mode = 1;
DataDumper data_dumper;
auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper);
opdebug_register.UnregisterDebugForModel(model_handle);
EXPECT_EQ(ret, ge::SUCCESS);
}
TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) {
OpdebugRegister opdebug_register;
rtStream_t stream = (void*)0x111;
uint32_t op_debug_mode = 1;
DataDumper data_dumper;
auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper);
opdebug_register.UnregisterDebugForStream(stream);
EXPECT_EQ(ret, ge::SUCCESS);
}
} // namespace ge
Loading…
Cancel
Save