support error_manager used in train

pull/1159/head
wangxiaotian22 4 years ago
parent ef0380f1c2
commit 179b0e21bd

@ -76,9 +76,7 @@ if (ENABLE_OPEN_SRC)
find_module(runtime libruntime.so ${GE_LIB_PATH})
find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH})
find_module(resource libresource.so ${GE_LIB_PATH})
find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH})
#find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
@ -86,11 +84,9 @@ if (ENABLE_OPEN_SRC)
else()
find_module(slog libalog.so ${ASCEND_ATC_DIR})
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
if(PLATFORM STREQUAL "train")
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
if(PRODUCT STREQUAL "flr3")
@ -100,8 +96,6 @@ if (ENABLE_OPEN_SRC)
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
if(PRODUCT STREQUAL "flr3")
elseif(PRODUCT STREQUAL "flr1")
@ -114,11 +108,9 @@ if (ENABLE_OPEN_SRC)
elseif(PLATFORM STREQUAL "all")
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR})
else()
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!")
@ -144,7 +136,6 @@ elseif (ENABLE_D OR ENABLE_ACL)
# common libraries
find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
if (ENABLE_D)
@ -164,7 +155,6 @@ elseif(ENABLE_MS_TESTCASES)
# common libraries
find_module(slog libalog.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)

@ -32,6 +32,7 @@
#include "graph/common/ge_call_wrapper.h"
#include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h"
#include "common/util/error_manager/error_manager.h"
#include "toolchain/plog.h"
using domi::OpRegistry;
@ -79,6 +80,8 @@ Status CheckOptionsValid(const std::map<string, string> &options) {
// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitializeImpl(const std::map<string, string> &options) {
GELOGT(TRACE_INIT, "GEInitialize start");
ErrorManager::GetInstance().GenWorkStreamIdDefault();
// 0.check init status
if (g_ge_initialized) {
GELOGW("GEInitialize is called more than once");
@ -157,6 +160,8 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
// GE finalize, releasing all resources
Status GEFinalize() {
GELOGT(TRACE_INIT, "GEFinalize start");
ErrorManager::GetInstance().GenWorkStreamIdDefault();
// check init status
if (!g_ge_initialized) {
GELOGW("GEFinalize is called before GEInitialize");
@ -202,9 +207,19 @@ Status GEFinalize() {
return ret;
}
std::string GEGetErrorMsg() {
return ErrorManager::GetInstance().GetErrorMessage();
}
std::string GEGetWarningMsg() {
return ErrorManager::GetInstance.GetWarningMessage();
}
// Initialize sessionwhich calls innerSession
Session::Session(const std::map<string, string> &options) {
GELOGT(TRACE_INIT, "Session Constructor start");
ErrorManager::GetInstance().GenWorkStreamIdDefault();
// check init status
sessionId_ = 0;
if (!g_ge_initialized) {
@ -235,6 +250,8 @@ Session::Session(const std::map<string, string> &options) {
Session::Session(const std::map<AscendString, AscendString> &options) {
GELOGT(TRACE_INIT, "Session Constructor start");
ErrorManager::GetInstance().GenWorkStreamIdDefault();
// check init status
sessionId_ = 0;
if (!g_ge_initialized) {
@ -311,11 +328,13 @@ Session::~Session() {
Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
std::map<std::string, std::string> options;
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
return AddGraph(graph_id, graph, options);
}
Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@ -334,6 +353,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<s
Status Session::AddGraph(uint32_t graph_id, const Graph &graph,
const std::map<AscendString, AscendString> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@ -360,6 +380,7 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph,
}
Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::map<AscendString, AscendString> options;
return AddGraphWithCopy(graph_id, graph, options);
}
@ -367,6 +388,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
const std::map<AscendString, AscendString> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
@ -389,6 +411,7 @@ Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
Status Session::RemoveGraph(uint32_t graph_id) {
GELOGT(TRACE_INIT, "Session RemoveGraph start");
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
// call RemoveGraph
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (!instance_ptr || !instance_ptr->InitFlag()) {
@ -457,6 +480,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) {
Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
GELOGT(TRACE_INIT, "Session RunGraph start");
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::vector<Tensor> graph_inputs = inputs;
// call RunGraph
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
@ -483,10 +507,12 @@ Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, s
}
Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
}
Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFunc &callback) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
std::string str_key;
if (key != nullptr) {
str_key = key;
@ -495,6 +521,7 @@ Status Session::RegisterCallBackFunc(const char *key, const session::pCallBackFu
}
Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@ -511,6 +538,7 @@ Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo>
Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
RunAsyncCallback callback) {
ErrorManager::GetInstance().GenWorkStreamIdBySessionGraph(sessionId_, graph_id);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@ -529,6 +557,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorIn
}
Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
@ -544,6 +573,7 @@ Status Session::GetVariables(const std::vector<std::string> &var_names, std::vec
}
Status Session::GetVariables(const std::vector<AscendString> &var_names, std::vector<Tensor> &var_values) {
ErrorManager::GetInstance().GenWorkStreamIdDefault();
auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");

@ -2641,6 +2641,7 @@ void *DavinciModel::Run(DavinciModel *model) {
bool seq_end_flag = false;
uint32_t model_id = model->Id();
uint32_t device_id = model->GetDeviceId();
GetContext().SetWorkStreamId(model->GetWorkStreamId());
GELOGI("Model Run thread start, model_id:%u.", model_id);
rtError_t rt_ret = rtSetDevice(static_cast<int32_t>(device_id));
@ -2807,6 +2808,7 @@ Status DavinciModel::ModelRunStart() {
int64_t maxDumpOpNum = std::strtol(opt.c_str(), nullptr, kDecimal);
maxDumpOpNum_ = maxDumpOpNum;
work_stream_id_ = GetContext().WorkStreamId();
CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this);
GELOGI("model tread create success, model id:%u.", model_id_);
return SUCCESS;

@ -412,6 +412,8 @@ class DavinciModel {
///
uint64_t GetSessionId() const { return session_id_; }
uint64_t GetWorkStreamId() const { return work_stream_id_; }
///
/// @ingroup ge
/// @brief SetDeviceId
@ -960,6 +962,7 @@ class DavinciModel {
vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task.
uint64_t session_id_;
uint64_t work_stream_id_;
uint32_t device_id_;

@ -541,7 +541,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
compute_graph->GetName(), session_id, GetContext().WorkStreamId(),
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@ -557,7 +557,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
compute_graph->GetName(), session_id, GetContext().WorkStreamId(),
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@ -2508,8 +2508,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
const SubGraphInfoPtr &sub_graph_info_ptr,
const std::string &root_graph_name,
uint64_t session_id,
uint64_t work_stream_id,
const GEThreadLocalContext &ge_context) {
if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) {
GetContext().SetWorkStreamId(work_stream_id);
GetContext().SetSessionId(session_id);
GetThreadLocalContext() = ge_context;
graph_manager->UpdateLocalOmgContext(root_graph_id);
@ -2643,6 +2645,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
GELOGI("A new loop start.");
GetContext().SetWorkStreamId(args.work_stream_id);
GetContext().SetSessionId(args.session_id);
GetThreadLocalContext() = args.context;
graph_manager->UpdateLocalOmgContext(args.graph_id);
@ -2724,8 +2727,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
ge_root_model = graph_node->GetGeRootModel();
}
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor,
ge_root_model, GetThreadLocalContext(), args.callback }));
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.work_stream_id,
args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback }));
GELOGI("Loop end.");
}
}
@ -2824,6 +2827,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
GELOGI("A new loop start.");
GetContext().SetWorkStreamId(args.work_stream_id);
GetContext().SetSessionId(args.session_id);
GetThreadLocalContext() = args.context;
graph_manager->UpdateLocalOmgContext(args.graph_id);

@ -196,6 +196,7 @@ class GraphManager {
GraphId graph_id;
std::vector<ge::InputTensorInfo> input_tensor;
uint64_t session_id;
uint64_t work_stream_id;
GEThreadLocalContext context;
RunAsyncCallback callback;
};
@ -204,6 +205,7 @@ class GraphManager {
GraphNodePtr graph_node;
GraphId graph_id;
uint64_t session_id;
uint64_t work_stream_id;
std::vector<ge::InputTensorInfo> input_tensor;
GeRootModelPtr ge_root_model;
GEThreadLocalContext context;
@ -221,6 +223,7 @@ class GraphManager {
const SubGraphInfoPtr &sub_graph_info_ptr,
const std::string &root_graph_name,
uint64_t session_id,
uint64_t work_stream_id;
const GEThreadLocalContext &ge_context);
Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor);
void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);

@ -1325,6 +1325,7 @@ int init(int argc, char* argv[]) {
return ret;
}
ErrorManager::GetInstance().GenWorkStreamIdDefault();
return 0;
}

@ -42,6 +42,10 @@ GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString
// Finalize GE, release all resources
GE_FUNC_VISIBILITY Status GEFinalize();
GE_FUNC_VISIBILITY std::string GEGetErrorMsg();
GE_FUNC_VISIBILITY std::string GEGetWarningMsg();
class GE_FUNC_VISIBILITY Session {
public:
ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &))

@ -1 +1 @@
Subproject commit b6de68fdf0f131fd5f8aa3a84245ad7779b348f5
Subproject commit f982caa0981b1fdcc55a8ec27b4f4de9c58d33ba

@ -1 +1 @@
Subproject commit 7a6311351f8294eb11033b10e9f7b2b993cc3c2a
Subproject commit d2fc958450f7bd243eff8432aadeb9fa95fa2f61
Loading…
Cancel
Save