From 0b666e41b1b448701d0ca86dc51b08928c68240f Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 22 Nov 2020 23:22:24 +0800 Subject: [PATCH 1/9] For pkg3 acllib dynamic link. --- .../ops_kernel_builder_manager.cc | 20 +++++++++++-------- .../ops_kernel_builder_manager.h | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc index e0001fcd..167be47b 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -33,6 +33,8 @@ const std::vector kHcclBuilderLibs = { "libhvd_opskernel_builder.so", "libhcom_gradtune_opskernel_builder.so" }; + +const std::string kAicoreUtilsLib = "libaicore_utils_runtime.so"; } // namespace OpsKernelBuilderManager::~OpsKernelBuilderManager() { // it's OK to call Finalize multiply times @@ -45,13 +47,11 @@ OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() { } Status OpsKernelBuilderManager::Initialize(const map &options, bool is_train) { - if (is_train) { - std::string lib_paths; - GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths)); - plugin_manager_.reset(new (std::nothrow)PluginManager()); - GE_CHECK_NOTNULL(plugin_manager_); - GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); - } + std::string lib_paths; + GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths, is_train)); + plugin_manager_.reset(new (std::nothrow)PluginManager()); + GE_CHECK_NOTNULL(plugin_manager_); + GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); GELOGI("Number of OpBuild = %zu", kernel_builders.size()); @@ -100,7 +100,8 @@ OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &n return nullptr; } -Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths) { +Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths, + bool is_train) { GELOGD("Start to execute GetLibPaths"); std::string path_base = PluginManager::GetPath(); std::string so_path = "plugin/opskernel/"; @@ -109,6 +110,9 @@ Status OpsKernelBuilderManager::GetLibPaths(const std::mapsecond != "0") { diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h index 7a95ddfa..207ebc79 100644 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -48,7 +48,7 @@ class OpsKernelBuilderManager { private: OpsKernelBuilderManager() = default; - static Status GetLibPaths(const std::map &options, std::string &lib_paths); + static Status GetLibPaths(const std::map &options, std::string &lib_paths, bool is_train); std::unique_ptr plugin_manager_; std::map ops_kernel_builders_{}; From 7fe250695305473b22a4b652a70f81ec3101c98d Mon Sep 17 00:00:00 2001 From: wangzhengjun Date: Mon, 7 Dec 2020 15:28:01 +0800 Subject: [PATCH 2/9] modify for static check 2 --- ge/graph/load/new_model_manager/data_dumper.cc | 4 ++-- .../new_model_manager/task_info/stream_switch_task_info.h | 4 ++-- ge/graph/load/new_model_manager/task_info/task_info.h | 4 ++-- ge/hybrid/executor/hybrid_model_executor.cc | 2 +- ge/hybrid/hybrid_davinci_model.cc | 4 ++-- ge/hybrid/model/hybrid_model_builder.cc | 2 +- ge/hybrid/node_executor/controlop/control_op_executor.cc | 2 +- inc/framework/common/taskdown_common.h | 2 -- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index 4534fe73..b331d780 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -919,11 +919,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector exceptio ReplaceStringElem(op_name); ReplaceStringElem(op_type); string dump_file_path = - "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time); + "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time); GELOGI("The exception dump file path is %s", dump_file_path.c_str()); uint64_t proto_size = dump_data.ByteSizeLong(); - unique_ptr proto_msg(new (std::nothrow) char[proto_size]); + std::unique_ptr proto_msg(new (std::nothrow) char[proto_size]); bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); if (!ret || proto_size == 0) { GELOGE(PARAM_INVALID, "Dump data proto serialize failed"); diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h index 89642cf8..a72d7de2 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h @@ -41,7 +41,7 @@ class StreamSwitchTaskInfo : public TaskInfo { Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; private: - void SetInputAndValuePtr(DavinciModel *davinci_model, const vector &input_data_addrs); + void SetInputAndValuePtr(DavinciModel *davinci_model, const std::vector &input_data_addrs); void *input_ptr_; rtCondition_t cond_; void *value_ptr_; @@ -49,7 +49,7 @@ class StreamSwitchTaskInfo : public TaskInfo { uint32_t true_stream_id_; rtSwitchDataType_t data_type_; static const uint32_t kInputNum = 2; - vector fixed_addr_offset_; + std::vector fixed_addr_offset_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index fe9c8c37..26f22564 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -63,8 +63,8 @@ struct RuntimeParam { }; typedef struct FusionOpInfo { - vector original_op_names; - string op_name; + std::vector original_op_names; + std::string op_name; uint32_t op_index; uint32_t stream_id; } FusionOpInfo; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 4af34451..8ba687c2 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -82,7 +82,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, Status HybridModelExecutor::Cleanup() { GELOGD("Start to cleanup."); context_.callback_manager->Destroy(); - RuntimeInferenceContext::DestroyContext(to_string(context_.session_id)); + RuntimeInferenceContext::DestroyContext(std::to_string(context_.session_id)); GELOGD("Cleanup successfully."); return SUCCESS; } diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index d696adf9..b6f5bb84 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -78,8 +78,8 @@ HybridDavinciModel::~HybridDavinciModel() { delete impl_; } -unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - auto instance = unique_ptr(new (std::nothrow)HybridDavinciModel()); +std::unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { + auto instance = std::unique_ptr(new (std::nothrow)HybridDavinciModel()); if (instance != nullptr) { instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); if (instance->impl_ != nullptr) { diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index f9564a8f..cd4c0a83 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -957,7 +957,7 @@ Status HybridModelBuilder::IndexTaskDefs() { // index task defs GELOGD("To index tasks for subgraph: %s", name.c_str()); - unordered_map node_map; + std::unordered_map node_map; for (const auto &node : sub_graph->GetDirectNode()) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc index 83fc09ee..74920b22 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -405,7 +405,7 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, auto node_item = model.GetNodeItem(node); GE_CHECK_NOTNULL(node_item); - unique_ptr node_task; + std::unique_ptr node_task; auto node_type = node->GetType(); if (node_type == IF || node_type == STATELESSIF) { node_task.reset(new(std::nothrow) IfOpNodeTask()); diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h index b1364d16..12c6af89 100644 --- a/inc/framework/common/taskdown_common.h +++ b/inc/framework/common/taskdown_common.h @@ -19,8 +19,6 @@ #include "runtime/rt.h" -using namespace std; - namespace ge { #define CC_FUSION_OP_MAX 32 From 2c24f922ffc2c8ad27ccebf38028ac1c4dd7957f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8A=A2?= Date: Mon, 7 Dec 2020 15:30:22 +0800 Subject: [PATCH 3/9] cpplint cast fix --- ge/graph/load/new_model_manager/davinci_model.cc | 2 +- .../task_info/super_kernel/super_kernel.cc | 9 +++++---- .../task_info/super_kernel/super_kernel_factory.cc | 12 ++++++------ ge/graph/load/new_model_manager/zero_copy_task.cc | 2 +- ge/omm/csa_interact.cc | 2 +- ge/session/omg.cc | 6 +++--- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 81d47b3b..1a4a5014 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2801,7 +2801,7 @@ void *DavinciModel::Run(DavinciModel *model) { reinterpret_cast(shape_data_buffer_data) + shape_data_buffer_length / sizeof(int64_t)); GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); - delete[] (int64_t *)current_data.blobs.back().data; + delete[] reinterpret_cast(current_data.blobs.back().data); current_data.blobs.pop_back(); } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index 63f29f84..e94fa425 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -25,10 +25,11 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { const void *args[] = {this->GetNavTablePtr(), reinterpret_cast(static_cast(this->GetNavTableSize()))}; - rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return - RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); + rtError_t rt_ret = rtMalloc(reinterpret_cast(device_args_addr_), sizeof(args), RT_MEMORY_HBM); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) + rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), (void *)args, sizeof(args), + RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index 69f7b159..39373901 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -87,7 +87,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list } GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); const size_t nav_table_len = 2 * stub_func_list.size(); - std::unique_ptr nav_table(new(std::nothrow) uint64_t[nav_table_len]); + std::unique_ptr nav_table(new (std::nothrow) uint64_t[nav_table_len]); GE_CHECK_NOTNULL(nav_table); uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); @@ -106,16 +106,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list nav_table[i * 2 + 1] = static_cast(reinterpret_cast(args_addr_list[i])); GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); } - rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); + rt_ret = rtMalloc(reinterpret_cast(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) - rt_ret = - rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(reinterpret_cast(hbm_nav_table_addr), nav_table_size, + reinterpret_cast(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) // Create the necessary metadata for the super kernel - h = std::unique_ptr( - new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); + h = + std::unique_ptr(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); return SUCCESS; } } // namespace skt diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 9b42d563..2609cb4b 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -131,7 +131,7 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma auto dst_addr = static_cast(buffer_addr); GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); - *(uintptr_t *)(args_info + offset) = reinterpret_cast(dst_addr); + *reinterpret_cast(args_info + offset)= reinterpret_cast(dst_addr); is_updated_ = true; } } diff --git a/ge/omm/csa_interact.cc b/ge/omm/csa_interact.cc index 1599af94..1b33ddbd 100644 --- a/ge/omm/csa_interact.cc +++ b/ge/omm/csa_interact.cc @@ -202,7 +202,7 @@ Status CsaInteract::WriteFile(const std::string &file_name, const std::string &c } } - mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length()); + mmSsize_t ret = mmWrite(fd, reinterpret_cast(const_cast(content.c_str())), content.length()); if (ret == EN_ERROR) { GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); ret = mmClose(fd); diff --git a/ge/session/omg.cc b/ge/session/omg.cc index df837f99..b5e1e105 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -891,7 +891,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con if (status != ge::GRAPH_SUCCESS) { GELOGE(ge::FAILED, "Om file init failed."); if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return status; @@ -902,7 +902,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con if (status != ge::GRAPH_SUCCESS) { GELOGE(ge::FAILED, "Get model part failed."); if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return status; @@ -928,7 +928,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con } if (model.model_data != nullptr) { - delete[](char *) model.model_data; + delete[] reinterpret_cast(model.model_data); model.model_data = nullptr; } return ret; From 65b310205a075c72c238bd444a1396b97fbc8211 Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Mon, 7 Dec 2020 15:33:28 +0800 Subject: [PATCH 4/9] clean bc warning and add atc.bin fwk_atc.bin --- ge/offline/CMakeLists.txt | 126 +++- ge/offline/atc | 20 + ge/offline/module.mk | 105 ++++ ge/stub/gen_stubapi.py | 7 + metadef | 2 +- parser | 2 +- tests/st/CMakeLists.txt | 42 -- tests/st/resnet50/common.cc | 768 ------------------------- tests/st/resnet50/common.h | 102 ---- tests/st/resnet50/ptest.h | 225 -------- tests/st/resnet50/resnet50_train.cc | 852 ---------------------------- tests/st/test_ge_st.py | 56 -- 12 files changed, 257 insertions(+), 2050 deletions(-) create mode 100644 ge/offline/atc delete mode 100644 tests/st/CMakeLists.txt delete mode 100644 tests/st/resnet50/common.cc delete mode 100644 tests/st/resnet50/common.h delete mode 100644 tests/st/resnet50/ptest.h delete mode 100644 tests/st/resnet50/resnet50_train.cc delete mode 100644 tests/st/test_ge_st.py diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index 49af37c0..b3a0d53c 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -11,13 +11,13 @@ set(SRC_LIST "main.cc" "single_op_parser.cc" "../session/omg.cc" - "../ir_build/atc_ir_common.cc" + "../ir_build/atc_ir_common.cc" ) ############ atc ############ add_executable(atc ${SRC_LIST} ${PROTO_HDRS}) -target_compile_options(atc PRIVATE +target_compile_options(atc PRIVATE -Werror -O2 -Wno-deprecated-declarations @@ -74,10 +74,130 @@ target_link_libraries(atc PRIVATE -ldl ) +############ atc.bin ############ +add_executable(atc.bin ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(atc.bin PRIVATE + -Werror + -O2 + -Wno-deprecated-declarations +) + +target_compile_definitions(atc.bin PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE + google=ascend_private +) + +target_include_directories(atc.bin PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/graph + ${METADEF_DIR}/inc/register + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external/register + ${PARSER_DIR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain +) + +target_link_libraries(atc.bin PRIVATE + $ + ascend_protobuf + ge_common + register + c_sec + graph + error_manager + ge_compiler + parser_common + gflags + json + runtime_compile + slog + static_mmpa + -lrt + -ldl +) + +############ fwk_atc.bin ############ +add_executable(fwk_atc.bin ${SRC_LIST} ${PROTO_HDRS}) + +target_compile_options(fwk_atc.bin PRIVATE + -Werror + -O2 + -Wno-deprecated-declarations +) + +target_compile_definitions(fwk_atc.bin PRIVATE + PROTOBUF_INLINE_NOT_IN_HEADERS=0 + COMPILE_OMG_PACKAGE + google=ascend_private +) + +target_include_directories(fwk_atc.bin PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc/external + ${GE_CODE_DIR}/common/inc/external + ${GE_CODE_DIR}/common/inc/external/graph + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/framework + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/graph + ${METADEF_DIR}/inc/register + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${METADEF_DIR}/inc/external/register + ${PARSER_DIR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/proto/ge + #### yellow zone #### + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/common + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain +) + +target_link_libraries(fwk_atc.bin PRIVATE + $ + ascend_protobuf + ge_common + register + c_sec + graph + error_manager + ge_compiler + parser_common + gflags + json + runtime_compile + slog + static_mmpa + -lrt + -ldl +) + ############ install ############ set(INSTALL_BASE_DIR "") set(INSTALL_LIBRARY_DIR lib) -install(TARGETS atc OPTIONAL +install(TARGETS atc atc.bin fwk_atc.bin OPTIONAL LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} ) diff --git a/ge/offline/atc b/ge/offline/atc new file mode 100644 index 00000000..a2b96482 --- /dev/null +++ b/ge/offline/atc @@ -0,0 +1,20 @@ +#!/bin/bash +#------------------------------------------------------------------- +# Purpose: +# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. +#------------------------------------------------------------------- + +LOCAL_PATH=$(cd "$(dirname "$0")"; pwd) +PKG_PATH=$(cd ${LOCAL_PATH}/..; pwd) +LIB_P="/lib64" +PYTHON_P="/python/site-packages" +LIB64_PATH="${PKG_PATH}${LIB_P}" +PYTHON_PATH="${PKG_PATH}${PYTHON_P}" +export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}" +export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}" + +if [ -f "${PKG_PATH}/bin/atc.bin" ];then + atc.bin $@ +else + fwk_atc.bin $@ +fi diff --git a/ge/offline/module.mk b/ge/offline/module.mk index 8859df29..c14be50f 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -54,3 +54,108 @@ LOCAL_LDFLAGS := -lrt -ldl include $(BUILD_HOST_EXECUTABLE) +include $(CLEAR_VARS) + +LOCAL_MODULE := atc.bin + +LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)parser \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libascend_protobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) + +include $(CLEAR_VARS) + +LOCAL_MODULE := fwk_atc.bin + +LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)parser \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libascend_protobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py index f2a6a287..d19b44a6 100644 --- a/ge/stub/gen_stubapi.py +++ b/ge/stub/gen_stubapi.py @@ -1,3 +1,10 @@ +#!/usr/bin/python3.7 +# -*- coding: UTF-8 -*- +#------------------------------------------------------------------- +# Purpose: +# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. +#------------------------------------------------------------------- + import os import re import sys diff --git a/metadef b/metadef index 29c31bb8..5b9a7f84 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 29c31bb87d8bbe6904ab6fa72034a803fb50a746 +Subproject commit 5b9a7f84a4347f8816d492aa51f2414ccf8a0744 diff --git a/parser b/parser index ba956d34..70369668 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit ba956d349d8ad3e864d27467f4f0119333cbadc6 +Subproject commit 70369668abebed84942d9f355494a89e82cc1eac diff --git a/tests/st/CMakeLists.txt b/tests/st/CMakeLists.txt deleted file mode 100644 index 56babec1..00000000 --- a/tests/st/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -cmake_minimum_required(VERSION 3.0) -set(CMAKE_CXX_STANDARD 11) -project(ge_st CXX C) - -set(CMAKE_CXX_FLAGS "-O1 -fPIC -Wl,-unresolved-symbols=ignore-in-shared-libs") - - -file(GLOB_RECURSE RES50_TRAIN_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "resnet50/resnet50_train.cc" - "resnet50/common.cc" -) - -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/graph) -include_directories(${GE_SOURCE_DIR}/inc/framework) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external/ge) -include_directories(${GE_SOURCE_DIR}/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops) -include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc) - -add_executable(st_resnet50_train ${RES50_TRAIN_SRCS}) -target_link_libraries(st_resnet50_train - ${PROTOBUF_LIBRARY} - ge_client_train ge_memory -) \ No newline at end of file diff --git a/tests/st/resnet50/common.cc b/tests/st/resnet50/common.cc deleted file mode 100644 index 674ef926..00000000 --- a/tests/st/resnet50/common.cc +++ /dev/null @@ -1,768 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "model.h" - -#define MAX_HEAD_SIZE 50 - -using namespace std; -using namespace ge; - -void update_op_format(Operator ops, Format format) { - printf("set format begin.........\n"); - ge::TensorDesc tensor_desc_x = ops.GetInputDesc("x"); - ge::TensorDesc tensor_desc_y = ops.GetOutputDesc("y"); - Format f_x0 = tensor_desc_x.GetFormat(); - Format f_y0 = tensor_desc_x.GetFormat(); - printf("before set x format:%d \n", f_x0); - printf("before set y format:%d \n", f_y0); - printf("format to be set is :%d \n", format); - tensor_desc_x.SetFormat(format); - tensor_desc_y.SetFormat(format); - ops.UpdateInputDesc("x", tensor_desc_x); - ops.UpdateOutputDesc("y", tensor_desc_y); - Format f_x = tensor_desc_x.GetFormat(); - Format f_y = tensor_desc_y.GetFormat(); - printf("after set x format:%d \n", f_x); - printf("after set y format:%d \n", f_y); -} - -/// getDimInfo: get dim info from data file -/// param: -/// fp: the testing datafile object -/// -/// return : -/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3,162(3*3*6*3)],4 is dim size,3,3,6,3 is the -/// dim shape data_size: the size of the testing data including the data file -void getDimInfo(FILE *fp, std::vector &dim_info) { - // get dim info from hisi testing data file - uint32_t *dim_buffer = (uint32_t *)malloc(MAX_HEAD_SIZE * sizeof(uint32_t)); - fread(dim_buffer, sizeof(uint32_t), MAX_HEAD_SIZE, fp); - dim_info.push_back(*dim_buffer); // get dim size - - // get data shape to compute the datasize - uint64_t data_size = 1; - uint32_t i = 1; - for (; i <= dim_info[0]; i++) { - dim_info.push_back(*(dim_buffer + i)); - data_size *= *(dim_buffer + i); - } - dim_info.push_back(data_size); - - free(dim_buffer); -} - -/// readTestDataFile: read test date from hisi .t datafile -/// param: -/// infile: the path of hisi .t datafile -/// return: -/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3],4 is dim size,3,3,6,3 is the dim shape -void *readTestDataFile(std::string infile, std::vector &dim_info) { - FILE *fp; - fp = fopen(infile.c_str(), "r"); - - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } else { - getDimInfo(fp, dim_info); - uint64_t data_size = dim_info[dim_info.size() - 1]; - - fclose(fp); - - fp = fopen(infile.c_str(), "r"); - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } - uint32_t *memory = (uint32_t *)malloc((dim_info[0] + 1 + data_size) * sizeof(uint32_t)); - fread(memory, sizeof(uint32_t), (dim_info[0] + 1 + data_size), fp); - fclose(fp); - return memory + (dim_info[0] + 1); - } -} - -void *readUint8TestDataFile(std::string infile, int size) { - FILE *fp; - fp = fopen(infile.c_str(), "r"); - - if (fp == NULL) { - printf("ERROR: cant't open file %s\n", infile.c_str()); - return NULL; - } - uint8_t *memory = (uint8_t *)malloc((size) * sizeof(uint8_t)); - fread(memory, sizeof(uint8_t), (size), fp); - fclose(fp); - return memory; -} - -/// allclose -/// param: -/// a:compared file a -/// b:compared file b -/// count: the count size which will compare -/// rtol: -/// atol: -/// return: -/// true or false -bool allclose(float *a, float *b, uint64_t count, float rtol = 1e-05, float atol = 1e-08) { - uint32_t i = 0; - - for (; i < count; ++i) { - if (fabs(a[i] - b[i]) > (atol + rtol * fabs(b[i]))) { - printf("compara failed: i= %d, a[i]=%f, b[i]=%f,atol=%f,rtol=%f\n", i, a[i], b[i], atol, rtol); - return false; - } - } - - return true; -} - -/// compFp32WithTData: compare the data with the data in hisi .t file -/// param: -/// actual_output_data: the result of ge -/// expected_data_file: the path of hisi .t result file -/// rtol: -/// atol: -/// return: -/// true of false -bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol = 1e-05, float atol = 1e-08) { - std::vector dim_info; - float *expected_output_data = (float *)readTestDataFile(expected_data_file, dim_info); - - uint32_t i = 1; - uint64_t data_size = 1; - for (; i <= dim_info[0]; i++) { - data_size *= dim_info[i]; - } - return allclose(actual_output_data, expected_output_data, data_size, rtol, atol); -} - -int SwitchDatatype(DataType dt) { - int size = 1; - if (dt == ge::DT_FLOAT) size = 4; - if (dt == ge::DT_INT32) size = 4; - if (dt == ge::DT_FLOAT16) size = 2; - if (dt == ge::DT_INT64) size = 8; - return size; -} - -ge::Tensor genTensor(std::vector tensor_shape, Format format, DataType dt) { - int size = 1; - for (int i = 0; i < tensor_shape.size(); i++) { - size = size * tensor_shape[i]; - } - - int data_type_size = SwitchDatatype(dt); - - size = abs(size * data_type_size); - vector data_value; - - if (size == 0) { - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); - input_tensor_desc.SetRealDimCnt(tensor_shape.size()); - Tensor gen_tensor = Tensor(input_tensor_desc, data_value); - return gen_tensor; - } - for (int i = 0; i < size; i++) { - data_value.push_back(1); - } - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); - input_tensor_desc.SetRealDimCnt(tensor_shape.size()); - Tensor gen_tensor = Tensor(input_tensor_desc, data_value); - return gen_tensor; -} - -ge::Tensor genTensor_withVaule(std::vector tensor_shape, float value) { - int size = 1; - for (int i = 0; i < tensor_shape.size(); i++) { - size = size * tensor_shape[i]; - } - - float *data_value = new float[size]; - for (int i = 0; i < size; i++) { - *(data_value + i) = value; - } - Tensor gen_ge_tensor; - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), FORMAT_NCHW); - gen_ge_tensor.SetTensorDesc(input_tensor_desc); - gen_ge_tensor.SetData((uint8_t *)data_value, size * 4); - - return gen_ge_tensor; -} - -Tensor genTesnor_Shape_as_data(std::vector tensor_shape) { - Format format = FORMAT_NCHW; - DataType dt = DT_INT32; - int size = tensor_shape.size(); - int32_t *tensor_data = new int32_t[size]; - std::cout << "shape tensor size:" << size << endl; - for (int i = 0; i < size; i++) { - *(tensor_data + i) = tensor_shape[i]; - } - - Tensor gen_tensor; - TensorDesc input_tensor_desc = TensorDesc(ge::Shape({size}), FORMAT_NCHW, DT_INT32); - gen_tensor.SetData((uint8_t *)tensor_data, size * GetDatTypeSize(dt)); - gen_tensor.SetTensorDesc(input_tensor_desc); - - return gen_tensor; -} - -/// train_flag is 0 when infer; train_flag is 1 when train; train_flag is 0 default -/// run_mode_path is not 0,1,2 when TBE; run_mode_path is 1 when FE; run_mode_path is 0 default -/// run_mode_path is 2 now when AICPU, ge.enabledlocalFmkop is 1 -ge::Status GEInitialize_api(string train_flag, string run_mode_path) { - ge::Status ret; - if (run_mode_path == "0") { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {"ge.aicpuFlag", "1"}, - {"ge.feFlag", "1"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {"ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so"}}; - ret = ge::GEInitialize(config); - } else if (run_mode_path == "1") { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {"ge.feFlag", "1"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/bert"}, - {"ge.soLoadPath", "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so"}}; - ret = ge::GEInitialize(config); - } else if (run_mode_path == "2") { - const std::map config = {{"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {LOCAL_FMKOP_FLAG, "1"}}; - ret = ge::GEInitialize(config); - } else { - const std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {"ge.graphRunMode", train_flag}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + run_mode_path}}; - ret = ge::GEInitialize(config); - } - std::cout << "GEInitialize_ret is " << ret << std::endl; - - return ret; -} - -/// train_flag is infer default -/// run_mode: is multi group of [fe,aicpu,bert,deeplabv3,mobilenetv2,single_path_nas,ssd] -/// but bert,deeplabv3,mobilenetv2,single_path_nas,ssd can only set one value from array -/// eg:"fe,aicpu,bert" or "fe", default is “fe” -/// "fe,aicpu,bert" remain open fe aicpu and bert -ge::Status GEInitialize_api_new(string train_flag, string run_mode) { - ge::Status ret; - vector modes; - - char *strs = new char[run_mode.length() + 1]; - strcpy(strs, run_mode.c_str()); - const char *delim = ","; - char *p = strtok(strs, delim); - while (p) { - string s = p; // transform substr to string - modes.push_back(s); // save to result array - p = strtok(NULL, delim); - } - - std::map config = { - {"device_id", "0,2,4,6"}, - {"rank_table_file", "hccl from csa/paas"}, - {DDK_VERSION_FLAG, "1.60.T17.B830"}, - {"ge.opsProtoLibPath", "/usr/local/HiAI/runtime/ops/op_proto/built-in/libopsproto.so"}}; - if (train_flag == "infer") - config.insert(pair("ge.graphRunMode", "0")); - else if (train_flag == "train") - config.insert(pair("ge.graphRunMode", "1")); - else - std::cout << "GeInitialize give the error param" << std::endl; - - for (int i = 0; i < modes.size(); i++) { - if (modes[i] == "fe") { - config.insert(pair("ge.feFlag", "1")); - if (config.find("ge.soLoadPath") != config.end()) { - config["ge.soLoadPath"] = - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" - "runtime/lib64/plugin/opskernel/librts_engine.so"; - } else { - config.insert(pair( - "ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); - } - } else if (modes[i] == "aicpu") { - config.insert(pair("ge.aicpuFlag", "1")); - if (config.find("ge.soLoadPath") != config.end()) { - config["ge.soLoadPath"] = - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" - "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" - "runtime/lib64/plugin/opskernel/librts_engine.so"; - } else { - config.insert(pair( - "ge.soLoadPath", - "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/" - "opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); - } - } else if (modes[i] == "bert" || modes[i] == "deeplabv3" || modes[i] == "mobilenetv2" || - modes[i] == "single_path_nas" || modes[i] == "ssd") { - config.insert(pair(TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + modes[i])); - } else if (modes[i] == "plugin") { - - } else - std::cout << "GeInitialize give the error param" << std::endl; - } - ret = ge::GEInitialize(config); - - std::cout << "GEInitialize_ret is " << ret << std::endl; - - return ret; -} - -ge::Status GEFinalize_api() { - ge::Status ret = ge::GEFinalize(); - std::cout << "GEFinalize ret is " << ret << std::endl; - - return ret; -} - -/// set train_flag -/// if run_mode_path is "fe" remain FE process; "fe,plugin" is FE and TBE plugin process -/// "aicpu" is open aicpu plugin -int RunGraph_initData(Graph &graph, string op_name, map> attr_test, string train_flag, - string run_mode_path) { - std::map options = {{RUN_FLAG, "1"}}; - uint32_t graph_id = 0; - - ge::Status ret = GEInitialize_api_new(train_flag, run_mode_path); - EXPECT_EQ(ret, ge::SUCCESS); - - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - std::vector input; - if (attr_test.find("input1") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input1"]); - input.push_back(input_tensor); - } - if (attr_test.find("input2") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input2"]); - input.push_back(input_tensor); - } - if (attr_test.find("input3") != attr_test.end()) { - Tensor input_tensor = genTensor(attr_test["input3"]); - input.push_back(input_tensor); - } - std::vector output; - - ret = session->AddGraph(graph_id, graph); - EXPECT_EQ(ret, ge::SUCCESS); - if (train_flag == "1") { - setenv("GE_TRAIN", "1", true); - ret = session->RunGraph(graph_id, input, output); - setenv("GE_TRAIN", "0", true); - } else { - ret = session->RunGraph(graph_id, input, output); - } - delete session; - GEFinalize_api(); - - if (ret != ge::SUCCESS) { - std::cout << " run graph failed" << std::endl; - return -1; - } else { - return 0; - } -} - -ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graph_id, Graph &graph, std::vector inputs, - std::vector &outputs) { - ge::Status ret = session->AddGraph(graph_id, graph); - EXPECT_EQ(ret, ge::SUCCESS); - ret = session->RunGraph(graph_id, inputs, outputs); - - return ret; -} - -ge::Session *create_session() { - // Init session - std::map options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - return session; -} - -ge::Session *create_aipp_session() { - // Init session - std::map options = {{"a", "b"}, {TRAIN_FLAG, "1"}, {"ge.insertOpFile", "/root/host/ge/aipp.cfg"}}; - ge::Session *session = new Session(options); - ASSERT_TRUE(session != NULL); - - return session; -} - -int buildCheckPointGraph(Graph &graph, map variables) { - std::vector inputs{}; - std::vector outputs{}; - - for (map::iterator it = variables.begin(); it != variables.end(); ++it) { - auto var = op::Variable(string(it->first)); - var.update_output_desc_y(it->second); - inputs.push_back(var); - graph.AddOp(var); - } - - auto save = op::Save().create_dynamic_input_tensors(inputs.size()); - for (int i = 0; i < inputs.size(); i++) { - save.set_dynamic_input_tensors(i, inputs[i]); - } - - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -int buildInitGraph(Graph &graph, std::vector desc_var, std::vector name_var, - std::vector values_var) { - std::vector inputs{}; - std::vector outputs{}; - - for (int i = 0; i < desc_var.size(); i++) { - desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); - auto tensor_data = genTensor_withVaule(desc_var[i].GetShape().GetDims(), values_var[i]); - auto var_constant = op::Constant().set_attr_value(tensor_data); - var_constant.update_output_desc_y(desc_var[i]); - - auto var_init = op::Variable(string(name_var[i])); - var_init.update_output_desc_y(desc_var[i]); - auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); - inputs.push_back(var_init); - } - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -int buildInitGraph_other_dataType(Graph &graph, std::vector desc_var, std::vector name_var) { - std::vector inputs{}; - std::vector outputs{}; - - for (int i = 0; i < desc_var.size(); i++) { - desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); - auto tensor_data = genTensor(desc_var[i].GetShape().GetDims(), desc_var[i].GetFormat(), desc_var[i].GetDataType()); - auto var_constant = op::Constant().set_attr_value(tensor_data); - var_constant.update_output_desc_y(desc_var[i]); - - auto var_init = op::Variable(string(name_var[i])); - var_init.update_output_desc_y(desc_var[i]); - auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); - inputs.push_back(var_init); - - graph.AddOp(var_constant); - graph.AddOp(var_init); - graph.AddOp(var_assign); - } - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} - -bool build_multi_input_multi_output_graph(Graph &graph) { - auto data1 = op::Data("Data1").set_attr_index(0); - auto data2 = op::Data("Data2").set_attr_index(1); - - vector dim_info; - - auto relu1 = op::Relu("Relu1").set_input_x(data1); - auto relu2 = op::Relu("Relu2").set_input_x(data2); - - auto eltwise = op::Eltwise("Eltwise") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, relu1) - .set_dynamic_input_x(1, relu2) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - auto eltwise1 = op::Eltwise("Eltwise1") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, eltwise) - .set_dynamic_input_x(1, eltwise) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - auto eltwise2 = op::Eltwise("Eltwise2") - .create_dynamic_input_x(2) - .set_dynamic_input_x(0, eltwise) - .set_dynamic_input_x(1, eltwise) - .set_attr_N(2) - .set_attr_mode(1) - .set_attr_coeff({1, 1}); - - std::vector inputs{data1, data2}; - std::vector outputs{eltwise1, eltwise2}; - graph.SetInputs(inputs).SetOutputs(outputs); - return true; -} - -void build_big_graph(Graph &graph, map> attr) { - auto data = op::Data("Data").set_attr_index(0); - auto weight = op::Const("weight1").set_attr_value(genTensor(attr["weight"])); - vector weight_shape(attr["weight"].begin(), attr["weight"].end()); - TensorDesc weight_desc(ge::Shape(weight_shape), FORMAT_NCHW, DT_FLOAT); - weight.update_output_desc_y(weight_desc); - auto conv_1 = op::Conv2D("conv1").set_input_x(data).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - - auto conv_2 = op::Conv2D("conv2").set_input_x(conv_1).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_3 = op::Conv2D("conv3").set_input_x(conv_2).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_4 = op::Conv2D("conv4").set_input_x(conv_3).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_5 = op::Conv2D("conv5").set_input_x(conv_4).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_6 = op::Conv2D("conv6").set_input_x(conv_5).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_7 = op::Conv2D("conv7").set_input_x(conv_6).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_8 = op::Conv2D("conv8").set_input_x(conv_7).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_9 = op::Conv2D("conv9").set_input_x(conv_8).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_10 = op::Conv2D("conv10").set_input_x(conv_9).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_11 = op::Conv2D("conv11").set_input_x(conv_10).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_12 = op::Conv2D("conv12").set_input_x(conv_11).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_13 = op::Conv2D("conv13").set_input_x(conv_12).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_14 = op::Conv2D("conv14").set_input_x(conv_13).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_15 = op::Conv2D("conv15").set_input_x(conv_14).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_16 = op::Conv2D("conv16").set_input_x(conv_15).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_17 = op::Conv2D("conv17").set_input_x(conv_16).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_18 = op::Conv2D("conv18").set_input_x(conv_17).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_19 = op::Conv2D("conv19").set_input_x(conv_18).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_20 = op::Conv2D("conv20").set_input_x(conv_19).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_21 = op::Conv2D("conv21").set_input_x(conv_20).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_22 = op::Conv2D("conv22").set_input_x(conv_21).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_23 = op::Conv2D("conv23").set_input_x(conv_22).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_24 = op::Conv2D("conv24").set_input_x(conv_23).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_25 = op::Conv2D("conv25").set_input_x(conv_24).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_26 = op::Conv2D("conv26").set_input_x(conv_25).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_27 = op::Conv2D("conv27").set_input_x(conv_26).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_28 = op::Conv2D("conv28").set_input_x(conv_27).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_29 = op::Conv2D("conv29").set_input_x(conv_28).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_30 = op::Conv2D("conv30").set_input_x(conv_29).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_31 = op::Conv2D("conv31").set_input_x(conv_30).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_32 = op::Conv2D("conv32").set_input_x(conv_31).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_33 = op::Conv2D("conv33").set_input_x(conv_32).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_34 = op::Conv2D("conv34").set_input_x(conv_33).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_35 = op::Conv2D("conv35").set_input_x(conv_34).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_36 = op::Conv2D("conv36").set_input_x(conv_35).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_37 = op::Conv2D("conv37").set_input_x(conv_36).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_38 = op::Conv2D("conv38").set_input_x(conv_37).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_39 = op::Conv2D("conv39").set_input_x(conv_38).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_40 = op::Conv2D("conv40").set_input_x(conv_39).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_41 = op::Conv2D("conv41").set_input_x(conv_40).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_42 = op::Conv2D("conv42").set_input_x(conv_41).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_43 = op::Conv2D("conv43").set_input_x(conv_42).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_44 = op::Conv2D("conv44").set_input_x(conv_43).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_45 = op::Conv2D("conv45").set_input_x(conv_44).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_46 = op::Conv2D("conv46").set_input_x(conv_45).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_47 = op::Conv2D("conv47").set_input_x(conv_46).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_48 = op::Conv2D("conv48").set_input_x(conv_47).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_49 = op::Conv2D("conv49").set_input_x(conv_48).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_50 = op::Conv2D("conv50").set_input_x(conv_49).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_51 = op::Conv2D("conv51").set_input_x(conv_50).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_52 = op::Conv2D("conv52").set_input_x(conv_51).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_53 = op::Conv2D("conv53").set_input_x(conv_52).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_54 = op::Conv2D("conv54").set_input_x(conv_53).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_55 = op::Conv2D("conv55").set_input_x(conv_54).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_56 = op::Conv2D("conv56").set_input_x(conv_55).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_57 = op::Conv2D("conv57").set_input_x(conv_56).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_58 = op::Conv2D("conv58").set_input_x(conv_57).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_59 = op::Conv2D("conv59").set_input_x(conv_58).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_60 = op::Conv2D("conv60").set_input_x(conv_59).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_61 = op::Conv2D("conv61").set_input_x(conv_60).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_62 = op::Conv2D("conv62").set_input_x(conv_61).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_63 = op::Conv2D("conv63").set_input_x(conv_62).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_64 = op::Conv2D("conv64").set_input_x(conv_63).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_65 = op::Conv2D("conv65").set_input_x(conv_64).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_66 = op::Conv2D("conv66").set_input_x(conv_65).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_67 = op::Conv2D("conv67").set_input_x(conv_66).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_68 = op::Conv2D("conv68").set_input_x(conv_67).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_69 = op::Conv2D("conv69").set_input_x(conv_68).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_70 = op::Conv2D("conv70").set_input_x(conv_69).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_71 = op::Conv2D("conv71").set_input_x(conv_70).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_72 = op::Conv2D("conv72").set_input_x(conv_71).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_73 = op::Conv2D("conv73").set_input_x(conv_72).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_74 = op::Conv2D("conv74").set_input_x(conv_73).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_75 = op::Conv2D("conv75").set_input_x(conv_74).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_76 = op::Conv2D("conv76").set_input_x(conv_75).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_77 = op::Conv2D("conv77").set_input_x(conv_76).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_78 = op::Conv2D("conv78").set_input_x(conv_77).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_79 = op::Conv2D("conv79").set_input_x(conv_78).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_80 = op::Conv2D("conv80").set_input_x(conv_79).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_81 = op::Conv2D("conv81").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_82 = op::Conv2D("conv82").set_input_x(conv_81).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_83 = op::Conv2D("conv83").set_input_x(conv_82).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_84 = op::Conv2D("conv84").set_input_x(conv_83).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_85 = op::Conv2D("conv85").set_input_x(conv_84).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_86 = op::Conv2D("conv86").set_input_x(conv_85).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_87 = op::Conv2D("conv87").set_input_x(conv_86).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_88 = op::Conv2D("conv88").set_input_x(conv_87).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_89 = op::Conv2D("conv89").set_input_x(conv_88).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_90 = op::Conv2D("conv90").set_input_x(conv_89).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_91 = op::Conv2D("conv91").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_92 = op::Conv2D("conv92").set_input_x(conv_91).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_93 = op::Conv2D("conv93").set_input_x(conv_92).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_94 = op::Conv2D("conv94").set_input_x(conv_93).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_95 = op::Conv2D("conv95").set_input_x(conv_94).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_96 = op::Conv2D("conv96").set_input_x(conv_95).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_97 = op::Conv2D("conv97").set_input_x(conv_96).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_98 = op::Conv2D("conv98").set_input_x(conv_97).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_99 = op::Conv2D("conv99").set_input_x(conv_98).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_100 = op::Conv2D("conv100").set_input_x(conv_99).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_101 = op::Conv2D("conv101").set_input_x(conv_100).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_102 = op::Conv2D("conv102").set_input_x(conv_101).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_103 = op::Conv2D("conv103").set_input_x(conv_102).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_104 = op::Conv2D("conv104").set_input_x(conv_103).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_105 = op::Conv2D("conv105").set_input_x(conv_104).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_106 = op::Conv2D("conv106").set_input_x(conv_105).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_107 = op::Conv2D("conv107").set_input_x(conv_106).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_108 = op::Conv2D("conv108").set_input_x(conv_107).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_109 = op::Conv2D("conv109").set_input_x(conv_108).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_110 = op::Conv2D("conv110").set_input_x(conv_109).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_111 = op::Conv2D("conv111").set_input_x(conv_110).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_112 = op::Conv2D("conv112").set_input_x(conv_111).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_113 = op::Conv2D("conv113").set_input_x(conv_112).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_114 = op::Conv2D("conv114").set_input_x(conv_113).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_115 = op::Conv2D("conv115").set_input_x(conv_114).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_116 = op::Conv2D("conv116").set_input_x(conv_115).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_117 = op::Conv2D("conv117").set_input_x(conv_116).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_118 = op::Conv2D("conv118").set_input_x(conv_117).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_119 = op::Conv2D("conv119").set_input_x(conv_118).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_120 = op::Conv2D("conv120").set_input_x(conv_119).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_121 = op::Conv2D("conv121").set_input_x(conv_120).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_122 = op::Conv2D("conv122").set_input_x(conv_121).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_123 = op::Conv2D("conv123").set_input_x(conv_122).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_124 = op::Conv2D("conv124").set_input_x(conv_123).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_125 = op::Conv2D("conv125").set_input_x(conv_124).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_126 = op::Conv2D("conv126").set_input_x(conv_125).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_127 = op::Conv2D("conv127").set_input_x(conv_126).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_128 = op::Conv2D("conv128").set_input_x(conv_127).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_129 = op::Conv2D("conv129").set_input_x(conv_128).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - auto conv_130 = op::Conv2D("conv130").set_input_x(conv_129).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); - - std::vector inputs{data}; - std::vector outputs{conv_130}; - graph.SetInputs(inputs).SetOutputs(outputs); -} - -int GetDatTypeSize(DataType dt) { - int dailation = 1; - if (dt == ge::DT_FLOAT) - dailation = 4; - else if (dt == ge::DT_FLOAT16) - dailation = 2; - else if (dt == ge::DT_INT16) - dailation = 2; - else if (dt == ge::DT_UINT16) - dailation = 2; - else if (dt == ge::DT_INT32) - dailation = 4; - else if (dt == ge::DT_UINT32) - dailation = 4; - else if (dt == ge::DT_INT64) - dailation = 8; - else if (dt == ge::DT_UINT64) - dailation = 8; - else if (dt == ge::DT_INT8) - dailation = 1; - - return dailation; -} - -int buildConvGraph_new(Graph &graph, std::vector desc_var, std::vector name_var, int flag, - Format format) { - auto data_x_shape = op::Data("xShape").set_attr_index(0); - auto var = op::Variable(name_var[0]); - auto var1 = op::Variable(name_var[1]); //add one seat of ApplyMomentum() - auto label1 = op::Variable(name_var[2]); //add one seat of ApplyMomentum() - auto conv2dgrad = op::Conv2DBackpropFilterD("output_1"); - auto test2 = op::ApplyMomentum(); - - var.update_output_desc_y(desc_var[0]); - var1.update_output_desc_y(desc_var[1]); - label1.update_output_desc_y(desc_var[2]); - - graph.AddOp(var); - graph.AddOp(var1); - graph.AddOp(label1); - - auto conv2d = op::Conv2D().set_input_x(data_x_shape).set_input_filter(var).set_attr_strides({1, 1, 1, 1}).set_attr_pads({0,0,0,0}); - update_op_format(conv2d, format); - ge::TensorDesc tensor_desc_w = conv2d.GetInputDesc("filter"); - tensor_desc_w.SetFormat(format); - conv2d.UpdateInputDesc("filter", tensor_desc_w); - - if (flag >= 1) { - conv2dgrad.set_input_x(data_x_shape) - .set_attr_filter_size(desc_var[0].GetShape().GetDims()) - .set_input_out_backprop(conv2d) - .set_attr_strides({1, 1, 1, 1}) - .set_attr_pads({0, 0, 0, 0}); - update_op_format(conv2dgrad, format); - graph.AddOp(conv2dgrad); - } - if (flag >= 2) { - // set conv2dgrad var - test2.set_input_accum(var1) - .set_input_grad(conv2dgrad) - .set_input_lr(label1) - .set_input_momentum(label1) - .set_input_var(var); - graph.AddOp(test2); - } - - std::vector inputs{data_x_shape}; // set all val - std::vector outputs{conv2d}; - graph.SetInputs(inputs).SetOutputs(outputs); - graph.AddOp(conv2d); - - return 0; -} - -/// load bin data_fail -/// input_path: path of bin data_file -/// shapes: the shape of Tensor -/// ft: the format of Tensor -/// dt: the dataType of Tensor -Tensor load_variable_input_data(string input_path, std::vector shapes, Format ft, DataType dt) { - vector dim_info1; - - uint8_t *input_data = (uint8_t *)readTestDataFile(input_path, dim_info1); // common.h - TensorDesc input_tensor_desc = TensorDesc(ge::Shape(shapes), ft, dt); - input_tensor_desc.SetRealDimCnt(shapes.size()); - Tensor input_tensor = Tensor(input_tensor_desc, input_data, GetDatTypeSize(dt) * dim_info1[dim_info1[0] + 1]); - return input_tensor; -} diff --git a/tests/st/resnet50/common.h b/tests/st/resnet50/common.h deleted file mode 100644 index 75805db7..00000000 --- a/tests/st/resnet50/common.h +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ST_RESNET50_GE_COMMON_H_ -#define ST_RESNET50_GE_COMMON_H_ -#include "common/ge_inner_error_codes.h" -#include "utils/tensor_utils.h" - -#define MY_USER_GE_LOGI(...) GE_LOG_INFO(1, __VA_ARGS__) -#define MY_USER_GE_LOGW(...) GE_LOG_WARN(1, __VA_ARGS__) -#define MY_USER_GE_LOGE(...) GE_LOG_ERROR(1, 3, __VA_ARGS__) - -#ifndef USER_GE_LOGI -#define USER_GE_LOGI MY_USER_GE_LOGI -#endif // USER_GE_LOGI - -#ifndef USER_GE_LOGW -#define USER_GE_LOGW MY_USER_GE_LOGW -#endif // USER_GE_LOGW - -#ifndef USER_GE_LOGE -#define USER_GE_LOGE MY_USER_GE_LOGE -#endif // USER_GE_LOGE - -/// train_flag is 0 when infer, train_flag is 1 when train.this param is set for RunGranph_readData() and -/// RunGraph_initData() -#define TRAIN_FLAG_INFER "infer" -#define TRAIN_FLAG_TRAIN "train" - -#include -#include -#include -#include -#include -#include -#include - -#include "ge_api.h" -#include "graph.h" -#include "ptest.h" -#include "ops/all_ops.h" -using namespace std; -using namespace ge; - -// read bin file and compile result -void update_op_format(Operator ops, Format format = ge::FORMAT_NCHW); -void getDimInfo(FILE *fp, std::vector &dim_info); -void *readTestDataFile(std::string infile, std::vector &dim_info); -void *readUint8TestDataFile(std::string infile, int size); -bool allclose(float *a, float *b, uint64_t count, float rtol, float atol); -bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol, float atol); -Tensor load_variable_input_data(string input_path, std::vector shapes, Format ft = ge::FORMAT_NCHW, - DataType dt = ge::DT_FLOAT); -// constructor Tensor -int GetDatTypeSize(DataType dt); -ge::Tensor genTensor(std::vector tensor_shape, Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); -ge::Tensor genTensor_withVaule(std::vector tensor_shape, float value = 1); -Tensor genTesnor_Shape_as_data(std::vector tensor_shape); -// Init GE -ge::Status GEInitialize_api(string train_flag = "0", string run_mode_path = "0"); -ge::Status GEInitialize_api_new(string train_flag = "infer", string run_mode = "fe"); -ge::Status GEFinalize_api(); -// constructor session and build graph -ge::Session *create_aipp_session(); -ge::Session *create_session(); -ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graphId, Graph &graph, std::vector inputs, - std::vector &outputs); - -// common interface for infer -int RunGraph_initData(Graph &graph, string op_name, map> attr_test, - string train_flag = "infer", string run_mode_path = "fe"); -void Inputs_load_Data(string op_name, std::vector &input, map> attr_test, - Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); -bool comparaData(std::vector &output, string op_name, map> attr_test); -int RunGraph_readData(Graph &graph, string op_name, map> attr_test, - string train_flag = "infer", string run_mode_path = "fe", Format format = ge::FORMAT_NCHW, - DataType dt = ge::DT_FLOAT); - -// common interface for train -int buildCheckPointGraph(Graph &graph, map variables); -int buildInitGraph(Graph &graph, std::vector desc_var, std::vector name_var, - std::vector values_var); -int buildInitGraph_other_dataType(Graph &graph, std::vector desc_var, std::vector name_var); - -bool build_multi_input_multi_output_graph(Graph &graph); -void build_big_graph(Graph &graph, map> attr); -int buildConvGraph_new(Graph &graph, std::vector desc_var, std::vector name_var, int flag = 2); - -#endif // ST_RESNET50_GE_COMMON_H_ diff --git a/tests/st/resnet50/ptest.h b/tests/st/resnet50/ptest.h deleted file mode 100644 index 568969f8..00000000 --- a/tests/st/resnet50/ptest.h +++ /dev/null @@ -1,225 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ST_RESNET50_PTEST_H_ -#define ST_RESNET50_PTEST_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ptest { -class assertion_error : public std::exception { - public: - const char *what() const throw() { return "Assertion Exception"; } -}; - -class TestFixture { - public: - virtual void SetUp() {} - virtual void TearDown() {} - void Run() { _func(); } - void BindFunction(std::function function) { _func = function; } - void SetName(const std::string &name) { _name = name; } - std::string Name() const { return _name; } - virtual ~TestFixture() {} - - private: - std::function _func; - std::string _name; -}; - -enum TestResult { SUCCESS, FAILED, UNAVAILABLE, UNKNOWN, NOCASEFOUND }; - -class TestManager { - public: - static TestManager &GetSingleton() { - static TestManager instance; - return instance; - } - void RegisterTest(const std::string &name, TestFixture *fixture) { _testfixtures[name] = fixture; } - - const std::string GetRunningTestcaseName() const { return _running_testcase_name; } - - const std::list GetAllTestNames() const { - std::list result; - for (auto &t : _testfixtures) { - result.push_back(t.first); - } - return result; - } - - TestResult RunTest(const std::string &name) { - if (_testfixtures.find(name) == _testfixtures.end()) { - return NOCASEFOUND; - } - - _running_testcase_name = name; - - do { - SetTestResult(name, UNKNOWN); - _testfixtures[name]->SetUp(); - if (_testresults[name] == FAILED) { - _testresults[name] = UNAVAILABLE; - break; - } - SetTestResult(name, SUCCESS); - try { - _testfixtures[name]->Run(); - } catch (assertion_error &e) { - // Do nothing as the error has been handled by the TestManager. - } - _testfixtures[name]->TearDown(); - } while (0); - - return _testresults[name]; - } - void SetTestResult(const std::string &name, TestResult result) { _testresults[name] = result; } - TestResult GetTestResult(const std::string &name) { return _testresults[name]; } - - private: - std::map _testfixtures; - std::map _testresults; - std::string _running_testcase_name; -}; - -class TestFixtureRegister { - public: - TestFixtureRegister(const std::string &name, TestFixture *fixture, std::function function) { - fixture->BindFunction(function); - fixture->SetName(name); - TestManager::GetSingleton().RegisterTest(name, fixture); - } -}; -} // namespace ptest - -#define _STR(x) #x -#define _EMPTY_NAMESPACE - -#define _TEST(NAMESPACE, FIXTURECLASS, TESTNAME, CASENAME) \ - void g_func_##TESTNAME##_##CASENAME(void); \ - NAMESPACE::FIXTURECLASS g_fixture_##TESTNAME##_##CASENAME; \ - ptest::TestFixtureRegister g_register_##TESTNAME##_##CASENAME( \ - _STR(TESTNAME##_##CASENAME), &g_fixture_##TESTNAME##_##CASENAME, g_func_##TESTNAME##_##CASENAME); \ - void g_func_##TESTNAME##_##CASENAME(void) - -#define TEST(TESTNAME, CASENAME) _TEST(ptest, TestFixture, TESTNAME, CASENAME) - -#define TEST_F(TESTFIXTURE, CASENAME) _TEST(_EMPTY_NAMESPACE, TESTFIXTURE, TESTFIXTURE, CASENAME) - -#define EXPECT_TRUE(X) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << "Expectation Failed\n" \ - << "Testcase Name: " << test_name << "\n" \ - << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ - } \ - } while (0); - -// With the macro definition ensures that the compiler can detect compiler warning. -#define Max_Log_Len 1024 -#define PRINT_ERR(lpszFormat, ...) \ - do { \ - char szTmpBuf[Max_Log_Len + 1] = {0}; \ - snprintf(szTmpBuf, Max_Log_Len, lpszFormat, ##__VA_ARGS__); \ - std::cerr << szTmpBuf << std::endl; \ - } while (0) - -// Increase the content of print error messages and error to facilitate rapid analysis -#define EXPECT_TRUE_C(X, ERR_TYPE, format, ...) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << " Expectation Failed." \ - << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ - PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ - } \ - } while (0) - -#define ASSERT_TRUE(X) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << "Assertion Failed\n" \ - << "Testcase Name: " << test_name << "\n" \ - << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ - throw ptest::assertion_error(); \ - } \ - } while (0); - -// Add printing error information and error line content for quick analysis -#define ASSERT_TRUE_C(X, ERR_TYPE, format, ...) \ - do { \ - if (!(X)) { \ - std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ - ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ - std::cerr << #X << " Assertion Failed." \ - << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ - PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ - throw ptest::assertion_error(); \ - } \ - } while (0); - -#define CONFIG_ERR "CONFIG_ERR" -#define LOAD_MODEL_ERR "LOAD_MODEL_ERR" -#define FILE_READ_ERR "FILE_READ_ERR" -#define RUN_ERROR "RUN_ERROR" -#define MEM_ERROR "MEM_ERROR" -#define RESULT_ERR "RESULT_ERR" - -#define EXPECT_FALSE(X) EXPECT_TRUE(!(X)) -#define EXPECT_EQ(X, Y) EXPECT_TRUE(((X) == (Y))) -#define EXPECT_NE(X, Y) EXPECT_TRUE(((X) != (Y))) -#define EXPECT_GT(X, Y) EXPECT_TRUE(((X) > (Y))) -#define EXPECT_GE(X, Y) EXPECT_TRUE(((X) >= (Y))) -#define EXPECT_LT(X, Y) EXPECT_TRUE(((X) < (Y))) -#define EXPECT_LE(X, Y) EXPECT_TRUE(((X) <= (Y))) - -#define EXPECT_FALSE_C(X, ERR_TYPE, format, ...) EXPECT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_EQ_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_NE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_GT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_GE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_LT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define EXPECT_LE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) - -#define ASSERT_FALSE(X) ASSERT_TRUE(!(X)) -#define ASSERT_EQ(X, Y) ASSERT_TRUE(((X) == (Y))) -#define ASSERT_NE(X, Y) ASSERT_TRUE(((X) != (Y))) -#define ASSERT_GT(X, Y) ASSERT_TRUE(((X) > (Y))) -#define ASSERT_GE(X, Y) ASSERT_TRUE(((X) >= (Y))) -#define ASSERT_LT(X, Y) ASSERT_TRUE(((X) < (Y))) -#define ASSERT_LE(X, Y) ASSERT_TRUE(((X) <= (Y))) - -#define ASSERT_FALSE_C(X, ERR_TYPE, format, ...) ASSERT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_EQ_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_NE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_GT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_GE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_LT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) -#define ASSERT_LE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) - -#endif // ST_RESNET50_PTEST_H_ diff --git a/tests/st/resnet50/resnet50_train.cc b/tests/st/resnet50/resnet50_train.cc deleted file mode 100644 index f1d1e58d..00000000 --- a/tests/st/resnet50/resnet50_train.cc +++ /dev/null @@ -1,852 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "ge_api.h" -#include "graph.h" -#include "ops/all_ops.h" -#include "types.h" -#include "utils/tensor_utils.h" - -using namespace std; -using namespace ge; -using namespace op; - -typedef bool (*Func)(Graph &graph); - -#define PADDING_MODE 6 -#define GRAD_PADDING_MODE 3 -vector pad_1{1, 1, 1, 1}; -vector pad_0{0, 0, 0, 0}; -vector stride_1{1, 1}; -vector stride_2{2, 2}; - -// (int out_channels, int h, int w, vector stride{1,1}, vector pad{1,1,1,1}, op::Data() input) -#define GENERATE_CONV_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ - auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ - LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_input_desc_x(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) << "'s weight shape is:" << in_channels << out_channels << h \ - << w << endl; \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) \ - << "'s input_x op's shape is:" << input.GetOutputDesc("y").GetShape().GetDim(2) << endl; \ - auto LAYER##_##BLK##_##OPNUM##_tmp_dims = input.GetOutputDesc("y").GetShape().GetDims(); \ - for (auto LAYER##_##BLK##_##OPNUM##_tmp_it = LAYER##_##BLK##_##OPNUM##_tmp_dims.begin(); \ - LAYER##_##BLK##_##OPNUM##_tmp_it != LAYER##_##BLK##_##OPNUM##_tmp_dims.end(); \ - LAYER##_##BLK##_##OPNUM##_tmp_it++) { \ - cout << *LAYER##_##BLK##_##OPNUM##_tmp_it; \ - } \ - cout << endl; \ - \ - auto LAYER##_##BLK##_##OPNUM = op::Conv2D(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_attr_strides({1, 1, stride[0], stride[1]}) \ - .set_attr_pads(pad) \ - .set_attr_data_format("NCHW"); \ - update_op_format(LAYER##_##BLK##_##OPNUM); - -#define GENERATE_CONSTANT(LAYER, BLK, OPNUM, CONSTNAME) \ - Tensor LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor; \ - float *LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data = new float[LAYER##_##BLK##_##OPNUM##_size]; \ - for (int i = 0; i < (int)LAYER##_##BLK##_##OPNUM##_size; i++) { \ - *(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data + i) = 0.01; \ - } \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetData((uint8_t *)LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data, \ - LAYER##_##BLK##_##OPNUM##_size * sizeof(float)); \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetTensorDesc(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant = \ - op::Constant().set_attr_value(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor); \ - LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - delete[] LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data; - -#define GENERATE_CONV_VAR_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ - uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ - auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ - LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ - LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, weight); \ - auto LAYER##_##BLK##_##OPNUM##_weight_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_weight_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_weight); \ - auto LAYER##_##BLK##_##OPNUM##_mom_weight_assign = \ - op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_weight) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_weight_constant); \ - \ - input.push_back(LAYER##_##BLK##_##OPNUM##_weight); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_weight); - -// (int out_channels, Operator& input) -#define GENERATE_BN_VAR(LAYER, BLK, OPNUM, out_channels, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ - auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ - LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ - LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ - LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ - LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ - LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - auto LAYER##_##BLK##_##OPNUM##_variance = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ - LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM = op::FusedBatchNorm(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_b(LAYER##_##BLK##_##OPNUM##_b) \ - .set_input_mean(LAYER##_##BLK##_##OPNUM##_mean) \ - .set_input_variance(LAYER##_##BLK##_##OPNUM##_variance) \ - .set_attr_mode(1) \ - .set_attr_epsilon(1e-5) \ - .set_attr_is_training(true); - -#define GENERATE_BN_VAR_VAR(LAYER, BLK, OPNUM, out_channels, input) \ - TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ - uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ - auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ - LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ - LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ - LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ - LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ - LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - auto LAYER##_##BLK##_##OPNUM##_variance = \ - op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ - LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_scale_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_scale_constant); \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_scale_assign = \ - op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_scale) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_scale_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, b); \ - \ - auto LAYER##_##BLK##_##OPNUM##_b_assign = \ - op::Assign().set_input_ref(LAYER##_##BLK##_##OPNUM##_b).set_input_value(LAYER##_##BLK##_##OPNUM##_b_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_b); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mom_b_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_b) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_b_constant); \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, mean); \ - \ - auto LAYER##_##BLK##_##OPNUM##_mean_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_mean) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_mean_constant); \ - \ - GENERATE_CONSTANT(LAYER, BLK, OPNUM, variance); \ - \ - auto LAYER##_##BLK##_##OPNUM##_variance_assign = op::Assign() \ - .set_input_ref(LAYER##_##BLK##_##OPNUM##_variance) \ - .set_input_value(LAYER##_##BLK##_##OPNUM##_variance_constant); \ - \ - input.push_back(LAYER##_##BLK##_##OPNUM##_scale); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_scale); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_b); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mom_b); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_mean); \ - input.push_back(LAYER##_##BLK##_##OPNUM##_variance); - -// (int out_channels, Operator& input) -#define GENERATE_RELU_VAR(LAYER, BLK, OPNUM, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - auto LAYER##_##BLK##_##OPNUM = op::Relu(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x(input, "y"); - -// (int out_channels, Operator& input) -#define GENERATE_MAXPOOL_VAR(LAYER, BLK, OPNUM, input) \ - auto &LAYER##_##BLK##_##OPNUM##_input = input; \ - \ - auto LAYER##_##BLK##_##OPNUM = op::MaxPoolWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ - .set_input_x(input, "y") \ - .set_attr_ksize({1, 3, 3, 1}) \ - .set_attr_padding("SAME") \ - .set_attr_strides({1, 2, 2, 1}); - -// (int out_channels, Operator& input) -#define GENERATE_ADD_VAR(LAYER, BLK, OPNUM, input_x1, input_x2) \ - auto LAYER##_##BLK##_##OPNUM = \ - op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x1(input_x1, "y").set_input_x2(input_x2, "y"); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_RESIDUAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ - auto &LAYER##_##BLK##_input = input; \ - auto &LAYER##_##BLK##_stride = stride; \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ - GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - LAYER##_##BLK##_relu1); \ - GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ - GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ - LAYER##_##BLK##_relu2); \ - GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn4, out_channels, LAYER##_##BLK##_conv4); \ - \ - GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, LAYER##_##BLK##_bn4); \ - GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ - \ - auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ - auto &LAYER##_##BLK##_output_label = "y"; - -#define MAKE_RESIDUAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn4, out_channels, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_NORMAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ - auto &LAYER##_##BLK##_input = input; \ - auto &LAYER##_##BLK##_stride = stride; \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ - GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - LAYER##_##BLK##_relu1); \ - GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ - GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ - \ - GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ - LAYER##_##BLK##_relu2); \ - GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ - \ - GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, input); \ - GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ - \ - auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ - auto &LAYER##_##BLK##_output_label = "y"; - -#define MAKE_NORMAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ - int LAYER##_##BLK##_out_chls = out_channels / 4; \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ - input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ - \ - GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ - GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_RESIDUAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ - MAKE_RESIDUAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ - \ - auto &LAYER##_output = LAYER##_blk1_output; \ - auto &LAYER##_output_label = LAYER##_blk1_output_label; - -#define MAKE_RESIDUAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ - MAKE_RESIDUAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); - -// (int in_channels, int out_channels,vector stride{1,1}, Operator& input) -#define MAKE_NORMAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ - MAKE_NORMAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ - \ - auto &LAYER##_output = LAYER##_blk1_output; \ - auto &LAYER##_output_label = LAYER##_blk1_output_label; - -#define MAKE_NORMAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ - MAKE_NORMAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); - -#define MAKE_RESNET50(input) \ - MAKE_RESIDUAL_LAYER(layer1, 64, 256, stride_1, input) \ - MAKE_NORMAL_LAYER(layer2, 256, 256, stride_1, layer1_output) \ - MAKE_NORMAL_LAYER(layer3, 256, 256, stride_1, layer2_output) \ - MAKE_RESIDUAL_LAYER(layer4, 256, 512, stride_2, layer3_output) \ - MAKE_NORMAL_LAYER(layer5, 512, 512, stride_1, layer4_output) \ - MAKE_NORMAL_LAYER(layer6, 512, 512, stride_1, layer5_output) \ - MAKE_NORMAL_LAYER(layer7, 512, 512, stride_1, layer6_output) \ - MAKE_RESIDUAL_LAYER(layer8, 512, 1024, stride_2, layer7_output) \ - MAKE_NORMAL_LAYER(layer9, 1024, 1024, stride_1, layer8_output) \ - MAKE_NORMAL_LAYER(layer10, 1024, 1024, stride_1, layer9_output) \ - MAKE_NORMAL_LAYER(layer11, 1024, 1024, stride_1, layer10_output) \ - MAKE_NORMAL_LAYER(layer12, 1024, 1024, stride_1, layer11_output) \ - MAKE_NORMAL_LAYER(layer13, 1024, 1024, stride_1, layer12_output) \ - MAKE_RESIDUAL_LAYER(layer14, 1024, 2048, stride_2, layer13_output) \ - MAKE_NORMAL_LAYER(layer15, 2048, 2048, stride_1, layer14_output) \ - MAKE_NORMAL_LAYER(layer16, 2048, 2048, stride_1, layer15_output) \ - \ - auto &resnet50_output = layer16_output; \ - auto &resnet50_output_label = layer16_output_label; - -#define MAKE_RESNET50_VAR(inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer1, 64, 256, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer2, 256, 256, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer3, 256, 256, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer4, 256, 512, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer5, 512, 512, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer6, 512, 512, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer7, 512, 512, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer8, 512, 1024, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer9, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer10, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer11, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer12, 1024, 1024, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer13, 1024, 1024, stride_1, inputs) \ - MAKE_RESIDUAL_LAYER_VAR(layer14, 1024, 2048, stride_2, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer15, 2048, 2048, stride_1, inputs) \ - MAKE_NORMAL_LAYER_VAR(layer16, 2048, 2048, stride_1, inputs) \ -//--------------------------------------------------------------------------------------------- - -// (Operator& input) -#define GENERATE_BIASADD_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::BiasAddGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x(input, input.name_out_dx()); - -// (Operator& input) -#define GENERATE_MATMUL_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::MatMul(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_x1(input); - -// (Operator& input) -#define GENERATE_RESHAPE_GRAD(LAYER, BLK, OPNUM, input) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::Reshape(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_tensor(input); - -// (Operator& input_grad, Operator& input_maxpool) -#define GENERATE_MAXPOOL_GRAD(LAYER, BLK, OPNUM, input_grad, input_maxpool) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::MaxPoolGradWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_input_grad(input_grad) \ - .set_input_argmax(input_maxpool, input_maxpool.name_out_argmax()) \ - .set_attr_ksize({1, 1, 3, 3}) \ - .set_attr_strides({1, 1, 2, 2}) \ - .set_attr_padding("SAME"); - -// (Operator& input_dy) -#define GENERATE_RELU_GRAD(LAYER, BLK, OPNUM, input_dy, dy_label) \ - auto LAYER##_##BLK##_##OPNUM##_grad = op::ReluGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_gradients(input_dy, dy_label) \ - .set_input_features(LAYER##_##BLK##_##OPNUM, "y"); - -// (Operator& input_dy) -#define GENERATE_BN_GRAD(LAYER, BLK, OPNUM, input_dy) \ - auto LAYER##_##BLK##_##OPNUM##_grad = \ - op::FusedBatchNormGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_dy(input_dy, "backprops") \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ - .set_input_save_mean(LAYER##_##BLK##_##OPNUM, "save_mean") \ - .set_input_save_inv_variance(LAYER##_##BLK##_##OPNUM, "save_inv_variance") \ - .set_attr_epsilon(0.0001); \ - \ - auto LAYER##_##BLK##_##OPNUM##_momentum_scale = \ - op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_scale) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_scale()) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_scale); \ - \ - auto LAYER##_##BLK##_##OPNUM##_momentum_b = \ - op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_b) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_bias()) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_b); - -// (Operator& input) -#define GENERATE_CONV_PROP_FILTER(LAYER, BLK, OPNUM, input_bngrad, stride) \ - auto LAYER##_##BLK##_##OPNUM##_propfilter = \ - op::Conv2DBackpropFilterD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propfilter")) \ - .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ - .set_attr_filter_size(LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetDims()) \ - .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ - .set_attr_strides(stride) \ - .set_attr_pads({1, 1, 1, 1}); \ - \ - update_op_format(LAYER##_##BLK##_##OPNUM##_propfilter); \ - auto LAYER##_##BLK##_##OPNUM##_momentum_weight = op::ApplyMomentum() \ - .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_weight) \ - .set_input_grad(LAYER##_##BLK##_##OPNUM##_propfilter) \ - .set_input_lr(label1) \ - .set_input_momentum(label1) \ - .set_input_var(LAYER##_##BLK##_##OPNUM##_weight); - -///.set_attr_input_size({input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(0),LAYER##_##BLK##_##OPNUM##_weight.GetOutputDesc().GetShape().GetDim(1), -///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(2)*stride[2], -///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(3)*stride[3]}) -#define GENERATE_CONV_PROP_INPUT(LAYER, BLK, OPNUM, input_bngrad, stride) \ - auto LAYER##_##BLK##_##OPNUM##_propinput = \ - op::Conv2DBackpropInputD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propinput")) \ - .set_attr_input_size(LAYER##_##BLK##_##OPNUM##_input.GetOutputDesc("y").GetShape().GetDims()) \ - .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ - .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ - .set_attr_strides(stride) \ - .set_attr_pads({1, 1, 1, 1}); \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ - << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(3) * stride[3] << endl; \ - cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ - << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(2) * stride[2] << endl; \ - \ - update_op_format(LAYER##_##BLK##_##OPNUM##_propinput); \ - auto &LAYER##_##BLK##_##OPNUM##_propinput_label = "y" - -// (int out_channels, Operator& input) -#define GENERATE_ADD_GRAD(LAYER, BLK, OPNUM, input_x1, input_x1_label, input_x2, input_x2_label) \ - auto LAYER##_##BLK##_##OPNUM##_grad = op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ - .set_input_x1(input_x1, input_x1_label) \ - .set_input_x2(input_x2, input_x2_label); - -// (Operator& input) -#define MAKE_RESIDUAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ - GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn4, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ - LAYER##_##BLK##_conv4_propinput, LAYER##_##BLK##_conv4_propinput_label); \ - \ - auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ - auto &LAYER##_##BLK##_grad_output_label = "y" - -// (Operator& input) -#define MAKE_NORMAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ - GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ - \ - GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ - \ - GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ - GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ - GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ - \ - GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ - input_dy, dy_label); \ - \ - auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ - auto &LAYER##_##BLK##_grad_output_label = "y" - -// (Operator& input_dy) -#define MAKE_RESIDUAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ - MAKE_RESIDUAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ - \ - auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ - auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; - -// (Operator& input_dy) -#define MAKE_NORMAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ - MAKE_NORMAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ - \ - auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ - auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; - -#define MAKE_RESNET50_GRAD(input_dy, dy_label) \ - MAKE_NORMAL_LAYER_GRAD(layer16, input_dy, dy_label) \ - MAKE_NORMAL_LAYER_GRAD(layer15, layer16_grad_output, layer16_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer14, layer15_grad_output, layer15_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer13, layer14_grad_output, layer14_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer12, layer13_grad_output, layer13_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer11, layer12_grad_output, layer12_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer10, layer11_grad_output, layer11_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer9, layer10_grad_output, layer10_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer8, layer9_grad_output, layer9_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer7, layer8_grad_output, layer8_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer6, layer7_grad_output, layer7_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer5, layer6_grad_output, layer6_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer4, layer5_grad_output, layer5_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer3, layer4_grad_output, layer4_grad_output_label) \ - MAKE_NORMAL_LAYER_GRAD(layer2, layer3_grad_output, layer3_grad_output_label) \ - MAKE_RESIDUAL_LAYER_GRAD(layer1, layer2_grad_output, layer2_grad_output_label) \ - \ - auto &resnet50_grad_output = layer1_grad_output; \ - auto &resnet50_grad_output_label = layer1_grad_output_label; - -bool resnet50(Graph &graph) { - auto data = op::Data().set_attr_index(0); - auto data1 = op::Data().set_attr_index(1); - TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); - data.update_output_desc_y(shape_desc); - - TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); - - auto var = op::Variable("conv2d_var"); - var.update_output_desc_y(desc); - var.update_input_desc_x(desc); - - auto varw1 = op::Variable("conv2d_varw1"); - varw1.update_output_desc_y(desc); - - auto conv2d = op::Conv2D("Translate") - .set_input_x(data) - .set_input_filter(var) - .set_attr_strides({1, 1, 2, 2}) - .set_attr_pads({2, 3, 2, 3}) - .set_attr_data_format("NCHW"); - TensorDesc desc_y; - desc_y.SetFormat(FORMAT_NCHW); // shape: 32 64 112 112 - conv2d.update_output_desc_y(desc_y); - - TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); - auto var1 = op::Variable("bn_var1"); - var1.update_output_desc_y(desc1); - - auto var2 = op::Variable("bn_var2"); - var2.update_output_desc_y(desc1); - - auto var3 = op::Variable("bn_var3"); - var3.update_output_desc_y(desc1); - - auto var4 = op::Variable("bn_var4"); - var4.update_output_desc_y(desc1); - - TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); - - auto var5 = op::Variable("var5"); - var5.update_output_desc_y(desc2); - - auto var6 = op::Variable("var6"); - var6.update_output_desc_y(desc2); - - TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - - auto label1 = op::Variable("label1"); - label1.update_output_desc_y(desclabel); - - TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - auto matvar = op::Variable("matvar"); - matvar.update_output_desc_y(descmatlabel); - - auto matvar1 = op::Variable("matvar1"); - matvar1.update_output_desc_y(descmatlabel); - - auto bn = op::FusedBatchNorm() - .set_input_x(conv2d, "y") - .set_input_scale(var1) - .set_input_b(var2) - .set_input_mean(var3) - .set_input_variance(var4) - .set_attr_mode(1) - .set_attr_epsilon(1e-5) - .set_attr_is_training(true) - .set_attr_is_training_fusion(true) - .set_attr_moving_average_fraction(994352128); - - auto relu = op::Relu().set_input_x(bn, "y"); - - auto maxpool = op::MaxPoolWithArgmax() - .set_input_x(relu, "y") - .set_attr_ksize({1, 3, 3, 1}) - .set_attr_padding("SAME") - .set_attr_strides({1, 2, 2, 1}); - - MAKE_RESNET50(maxpool); - std::vector inputs{data}; //,var,var1,layer1_blk1_bn1_b,var3,var4}; - std::vector outputs{}; - - graph.SetInputs(inputs).SetOutputs(outputs); - return true; -} - -#define GENERATE_CONSTANT_USE_DESC(OPNUM, desc, val) \ - uint32_t OPNUM##_size = desc.GetShape().GetShapeSize(); \ - Tensor OPNUM##_tensor; \ - OPNUM##_tensor.SetTensorDesc(desc); \ - if (desc.GetDataType() == DT_FLOAT) { \ - float *OPNUM##_data = new float[OPNUM##_size]; \ - for (int i = 0; i < (int)OPNUM##_size; i++) { \ - *(OPNUM##_data + i) = val; \ - } \ - OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(float)); \ - delete[] OPNUM##_data; \ - } \ - if (desc.GetDataType() == DT_INT64) { \ - int64_t *OPNUM##_data = new int64_t[OPNUM##_size]; \ - for (int i = 0; i < (int)OPNUM##_size; i++) { \ - *(OPNUM##_data + i) = val; \ - } \ - OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(int64_t)); \ - delete[] OPNUM##_data; \ - } \ - auto OPNUM##_constant = op::Constant().set_attr_value(OPNUM##_tensor); \ - OPNUM##_constant.update_output_desc_y(desc); - -#define GENERATE_VAR_LAYER(OPNUM, desc, input) \ - auto OPNUM##_weight = op::Variable(string(#OPNUM)); \ - OPNUM##_weight.update_output_desc_y(desc); \ - auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ - \ - input.push_back(OPNUM##_weight); - -#define GENERATE_VAR_LAYER_1(OPNUM, desc, var_format, input, name) \ - auto OPNUM##_weight = op::Variable(string(name)); \ - OPNUM##_weight.update_output_desc_y(desc); \ - auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ - \ - input.push_back(OPNUM##_weight); - -int BuildInitVarGraph(Graph &graph) { - std::vector inputs{}; - std::vector outputs{}; - - TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(conv2d_var, desc, 0.01); - GENERATE_VAR_LAYER(conv2d_var, desc, inputs); - - GENERATE_CONSTANT_USE_DESC(conv2d_varw1, desc, 0.01); - GENERATE_VAR_LAYER(conv2d_varw1, desc, inputs); - - TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(bn_var1, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var1, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var2, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var2, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var3, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var3, desc1, inputs); - GENERATE_CONSTANT_USE_DESC(bn_var4, desc1, 0.01); - GENERATE_VAR_LAYER(bn_var4, desc1, inputs); - - TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(var5, desc2, 0.01); - GENERATE_VAR_LAYER(var5, desc2, inputs); - GENERATE_CONSTANT_USE_DESC(var6, desc2, 0.01); - GENERATE_VAR_LAYER(var6, desc2, inputs); - - TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(label1, desclabel, 0.1); - GENERATE_VAR_LAYER(label1, desclabel, inputs); - - TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); - GENERATE_CONSTANT_USE_DESC(matvar, descmatlabel, 0.01); - GENERATE_VAR_LAYER(matvar, descmatlabel, inputs); - GENERATE_CONSTANT_USE_DESC(matvar1, descmatlabel, 0.01); - GENERATE_VAR_LAYER(matvar1, descmatlabel, inputs); - - MAKE_RESNET50_VAR(inputs); - - TensorDesc ctrl(ge::Shape({1, 1, 1, 1}), FORMAT_NCHW, DT_INT64); - - GENERATE_CONSTANT_USE_DESC(iterations_per_loop, ctrl, 100); - GENERATE_VAR_LAYER_1(iterations_per_loop, ctrl, "4D", inputs, "npu_runconfig/iterations_per_loop"); - GENERATE_CONSTANT_USE_DESC(loop_cond, ctrl, 0); - GENERATE_VAR_LAYER_1(loop_cond, ctrl, "4D", inputs, "npu_runconfig/loop_cond"); - GENERATE_CONSTANT_USE_DESC(one, ctrl, 1); - GENERATE_VAR_LAYER_1(one, ctrl, "4D", inputs, "npu_runconfig/one"); - GENERATE_CONSTANT_USE_DESC(zero, ctrl, 0); - GENERATE_VAR_LAYER_1(zero, ctrl, "4D", inputs, "npu_runconfig/zero"); - - graph.SetInputs(inputs).SetOutputs(outputs); - return 0; -} -int TestBuildGraphTest(Func fun, Graph &graph, vector &inputs, vector &outputs) { - bool graph_ret = fun(graph); - ge::Tensor shapeTensor; - TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); - uint32_t sizeshape = shape_desc.GetShape().GetShapeSize(); - printf("[test] desc size filter shape:%u\n", sizeshape); - shapeTensor.SetTensorDesc(shape_desc); - vector dataValuec; - for (int i = 0; i < sizeshape; i++) { - dataValuec.push_back(1); - } - - shapeTensor.SetData((uint8_t *)dataValuec.data(), 4 * sizeshape); - inputs.push_back(shapeTensor); - - ge::Tensor shapeTensor1; - TensorDesc shape_desc1(ge::Shape({1, 32, 1, 1}), FORMAT_NCHW, DT_FLOAT); - uint32_t sizeshape1 = shape_desc1.GetShape().GetShapeSize(); - printf("[test] desc size filter shape:%u\n", sizeshape1); - shapeTensor1.SetTensorDesc(shape_desc1); - vector dataValuec1; - for (int i = 0; i < sizeshape1; i++) { - dataValuec1.push_back(1); - } - - shapeTensor1.SetData((uint8_t *)dataValuec1.data(), 4 * sizeshape1); - - return 0; -} -int runTrainGraph(Func fun, int loopCount) { - printf("GE BBIT begin...\n"); - std::chrono::system_clock::time_point start = std::chrono::system_clock::now(); - - std::map ge_options = { - {"device_id", "0"}, {"rank_table_file", ""}, {"graphType", "1"}, {"ge.graphRunMode", "2"}}; - - std::map session_options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; - - ge::Status ret; - - // init ge - ret = GEInitialize_api_new("train", "fe,plugin"); - printf("ge::GEInitialize ret:%d\n", ret); - - // init session - ge::Session session(session_options); - - int graphId_initvar = 1; - ge::Graph graph_initvar("initVarGraph"); - bool graph_ret = BuildInitVarGraph(graph_initvar); - - // session addgraph - int graphId = 0; - - // build graph - ge::Graph graph("bigGraph"); - std::vector inputs; - ge::Tensor outputTensor; - std::vector outputs; - graph_ret = TestBuildGraphTest(fun, graph, inputs, outputs); - printf("TestReluGrad ret:%d\n", graph_ret); - - ret = session.AddGraph(graphId_initvar, graph_initvar); - printf("session.AddVarGraph ret:%d\n", ret); - if (ret) return ret; - - ret = session.AddGraph(graphId, graph); - printf("session.AddGraph ret:%d\n", ret); - if (ret) return ret; - - std::vector inputs1; - std::vector outputs1; - ret = session.RunGraph(graphId_initvar, inputs1, outputs1); - - if (ret != SUCCESS) { - return ret; - } - // add loop for test of stabilty: - for (int i = 0; i < loopCount; i++) { - // session rungraph - printf("loopCount:%d\n", loopCount); - ret = session.RunGraph(graphId, inputs, outputs); - printf("session.RunGraph ret:%d\n", ret); - if (ret) return ret; - - // define 99999 as loop forever - if (loopCount == 99999) i = 0; - } - std::chrono::system_clock::time_point end = std::chrono::system_clock::now(); - auto millisecondsduration = std::chrono::duration_cast(end - start); - auto ms = millisecondsduration.count(); - std::stringstream ss; - ss << ms << "ms"; - std::string run_time = ss.str(); - printf("run time is : %s \n", run_time.c_str()); - - return 0; -} - -int main(int argc, char *argv[]) { - // add loop for test of stabilty: - int loopCount = 1; - if (argc >= 2) loopCount = atoi(argv[1]); - - Status ret = SUCCESS; - ret = runTrainGraph(resnet50, loopCount); - if (ret == SUCCESS) { - std::cout << "[train resnet50 success]" << std::endl; - } else { - std::cout << "!!! train resnet50 fail !!!" << std::endl; - } - return ret; -} diff --git a/tests/st/test_ge_st.py b/tests/st/test_ge_st.py deleted file mode 100644 index b5479cfc..00000000 --- a/tests/st/test_ge_st.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -""" -ge st test. -""" -import pytest -import subprocess -import os - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_card -@pytest.mark.component_ge -def test_resnet50_train(): - ge_st_dir=os.environ.get('GE_ST_DIR', - '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') - ge_lib_dir=os.environ.get('GRAPHENGINE_LIB', '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') - - real_pythonpath=os.environ.get('REAL_PYTHONPATH') - pythonpath=os.environ.get('PYTHONPATH') - if real_pythonpath: - if pythonpath: - os.environ['PYTHONPATH']=real_pythonpath+':'+pythonpath - else: - os.environ['PYTHONPATH']=real_pythonpath - print('PYTHONPATH: '+os.environ.get('PYTHONPATH')) - - os.environ['ASCEND_OPP_PATH']='/usr/local/Ascend/opp' - os.environ['ASCEND_ENGINE_PATH']='/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:' \ - '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:' \ - '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so:'+ \ - ge_lib_dir + '/libge_local_engine.so' - print('ASCEND_OPP_PATH: '+os.environ.get('ASCEND_OPP_PATH')) - print('ASCEND_ENGINE_PATH: '+os.environ.get('ASCEND_ENGINE_PATH')) - print('LD_LIBRARY_PATH: '+os.environ.get('LD_LIBRARY_PATH')) - - cmd=ge_st_dir + '/st_resnet50_train' - print('cmd: '+cmd) - os.environ['SLOG_PRINT_TO_STDOUT']="1" - ret=subprocess.call([cmd], shell=True) - assert ret==0 - From e9e5dd7b9dcaf898a5d4e942a50ac0f2deb05bf6 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Mon, 7 Dec 2020 17:14:14 +0800 Subject: [PATCH 5/9] fix geruntime missing files and error codes --- ge/ge_runtime/CMakeLists.txt | 3 +++ ge/ge_runtime/runtime_model.cc | 4 ++-- ge/ge_runtime/task/task.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index 42d3b344..ce1b89ea 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -13,6 +13,9 @@ set(GE_SRC_LIST "task/hccl_task.cc" "task/memcpy_async_task.cc" "task/profiler_task.cc" + "task/label_goto_task.cc" + "task/label_set_task.cc" + "task/label_switch_task.cc" ) add_library(ge_runtime SHARED ${GE_SRC_LIST}) diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index 0ff56ef1..fb0f3e85 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -307,8 +307,8 @@ bool RuntimeModel::Run() { ret = rtStreamSynchronize(rt_model_stream_); if (ret != RT_ERROR_NONE) { - if (ret == RT_ERROR_END_OF_SEQUENCE) { - GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); + if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) { + GELOGI("Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received, ret = 0x%X", ret); return true; } GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); diff --git a/ge/ge_runtime/task/task.h b/ge/ge_runtime/task/task.h index 6c4df248..c255fd22 100644 --- a/ge/ge_runtime/task/task.h +++ b/ge/ge_runtime/task/task.h @@ -24,6 +24,7 @@ #include "runtime/rt_model.h" #include "ge_runtime/model_context.h" #include "ge_runtime/task_info.h" +#include "external/runtime/rt_error_codes.h" namespace ge { namespace model_runner { From b8e82bb16e1b72d5aee31a6f7354d4efa0b4f4e3 Mon Sep 17 00:00:00 2001 From: lichun Date: Mon, 7 Dec 2020 19:57:03 +0800 Subject: [PATCH 6/9] inference supports dynamic shape --- ge/common/ge/op_tiling_manager.cc | 4 + ge/common/ge/op_tiling_manager.h | 1 + ge/executor/CMakeLists.txt | 94 ++++++++- ge/executor/ge_executor.cc | 73 ++++++- ge/executor/module.mk | 84 +++++++- ge/ge_local_engine/CMakeLists.txt | 2 +- ge/ge_local_engine/engine/host_cpu_engine.cc | 10 +- ge/ge_local_engine/engine/host_cpu_engine.h | 2 +- ge/graph/build/graph_builder.cc | 51 +++++ ge/graph/load/graph_loader.cc | 7 +- ge/graph/load/graph_loader.h | 3 +- .../load/new_model_manager/davinci_model.cc | 88 ++++---- .../load/new_model_manager/davinci_model.h | 7 +- .../load/new_model_manager/model_manager.cc | 48 ++++- .../load/new_model_manager/model_manager.h | 5 +- ge/graph/partition/dynamic_shape_partition.cc | 47 ++++- ge/graph/partition/dynamic_shape_partition.h | 1 + ge/graph/passes/pass_utils.cc | 4 - .../passes/transop_breadth_fusion_pass.cc | 2 +- ge/host_cpu_engine/CMakeLists.txt | 6 +- ge/host_kernels/floordiv_kernel.cc | 4 +- ge/host_kernels/floordiv_kernel.h | 4 - ge/host_kernels/ssd_prior_box_kernel.cc | 6 +- ge/hybrid/executor/hybrid_execution_context.h | 2 +- .../executor/hybrid_model_async_executor.cc | 38 ++++ .../executor/hybrid_model_async_executor.h | 5 + ge/hybrid/executor/hybrid_profiler.h | 2 +- ge/hybrid/executor/node_state.h | 2 +- ge/hybrid/hybrid_davinci_model.cc | 79 ++++++++ ge/hybrid/hybrid_davinci_model.h | 21 ++ ge/hybrid/hybrid_davinci_model_stub.cc | 32 +++ ge/hybrid/model/hybrid_model.cc | 188 +++++++++++++++++- ge/hybrid/model/hybrid_model.h | 26 +++ ge/hybrid/model/hybrid_model_builder.cc | 31 ++- .../node_executor/aicore/aicore_op_task.cc | 56 ++++++ .../node_executor/aicore/aicore_op_task.h | 1 + .../aicore/aicore_task_compiler.h | 2 +- .../node_executor/aicpu/aicpu_node_executor.h | 2 + .../controlop/control_op_executor.h | 1 + .../ge_local/ge_local_node_executor.cc | 2 +- .../host_cpu/kernel/assign_kernel.cc | 1 - ge/hybrid/node_executor/node_executor.cc | 1 - .../partitioned_call_node_executor.h | 1 - ge/hybrid/node_executor/task_context.h | 2 +- inc/framework/executor/ge_executor.h | 16 ++ 45 files changed, 976 insertions(+), 88 deletions(-) diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc index 9b5ba2d7..db959368 100644 --- a/ge/common/ge/op_tiling_manager.cc +++ b/ge/common/ge/op_tiling_manager.cc @@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() { } } +OpTilingManager &OpTilingManager::GetInstance() { + static OpTilingManager instance; + return instance; +} } // namespace ge diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h index d4e7f34e..17761969 100644 --- a/ge/common/ge/op_tiling_manager.h +++ b/ge/common/ge/op_tiling_manager.h @@ -25,6 +25,7 @@ using SoToHandleMap = std::map; class OpTilingManager { public: OpTilingManager() = default; + static OpTilingManager &GetInstance(); ~OpTilingManager(); void LoadSo(); diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index de8025f3..d7dfdc84 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -72,7 +72,89 @@ set(SRC_LIST "../single_op/task/tbe_task_builder.cc" "../single_op/task/aicpu_task_builder.cc" "../single_op/task/aicpu_kernel_task_builder.cc" - "../hybrid/hybrid_davinci_model_stub.cc" + "../hybrid/common/tensor_value.cc" + "../hybrid/common/npu_memory_allocator.cc" + "../hybrid/executor/rt_callback_manager.cc" + "../hybrid/executor/node_state.cc" + "../hybrid/executor/node_done_manager.cc" + "../hybrid/executor/hybrid_profiler.cc" + "../hybrid/executor/hybrid_model_executor.cc" + "../hybrid/executor/hybrid_model_async_executor.cc" + "../hybrid/executor/hybrid_execution_context.cc" + "../hybrid/executor/subgraph_context.cc" + "../hybrid/executor/subgraph_executor.cc" + "../hybrid/executor/worker/task_compile_engine.cc" + "../hybrid/executor/worker/shape_inference_engine.cc" + "../hybrid/executor/worker/execution_engine.cc" + "../hybrid/model/hybrid_model.cc" + "../hybrid/model/hybrid_model_builder.cc" + "../hybrid/model/node_item.cc" + "../hybrid/model/graph_item.cc" + "../hybrid/node_executor/aicore/aicore_node_executor.cc" + "../hybrid/node_executor/aicore/aicore_op_task.cc" + "../hybrid/node_executor/aicore/aicore_task_builder.cc" + "../hybrid/node_executor/aicpu/aicpu_node_executor.cc" + "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" + "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" + "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" + "../hybrid/node_executor/host_cpu/kernel_factory.cc" + "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" + "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" + "../hybrid/node_executor/controlop/control_op_executor.cc" + "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" + "../hybrid/node_executor/rts/rts_node_executor.cc" + "../hybrid/node_executor/node_executor.cc" + "../hybrid/node_executor/task_context.cc" + "../hybrid/hybrid_davinci_model.cc" + "../ge_local_engine/engine/host_cpu_engine.cc" + "../graph/common/omg_util.cc" + "../graph/manager/host_mem_manager.cc" + "../graph/build/memory/var_mem_assign_util.cc" + "../host_kernels/transpose_kernel.cc" + "../host_kernels/add_kernel.cc" + "../host_kernels/broadcast_args_kernel.cc" + "../host_kernels/broadcast_gradient_args_kernel.cc" + "../host_kernels/cast_kernel.cc" + "../host_kernels/concat_offset_kernel.cc" + "../host_kernels/concat_v2_kernel.cc" + "../host_kernels/dynamic_stitch_kernel.cc" + "../host_kernels/identity_kernel.cc" + "../host_kernels/empty_kernel.cc" + "../host_kernels/expanddims_kernel.cc" + "../host_kernels/fill_kernel.cc" + "../host_kernels/floordiv_kernel.cc" + "../host_kernels/floormod_kernel.cc" + "../host_kernels/gather_v2_kernel.cc" + "../host_kernels/greater_kernel.cc" + "../host_kernels/kernel_utils.cc" + "../host_kernels/maximum_kernel.cc" + "../host_kernels/mul_kernel.cc" + "../host_kernels/pack_kernel.cc" + "../host_kernels/permute_kernel.cc" + "../host_kernels/range_kernel.cc" + "../host_kernels/rank_kernel.cc" + "../host_kernels/reduce_prod_kernel.cc" + "../host_kernels/reshape_kernel.cc" + "../host_kernels/rsqrt_kernel.cc" + "../host_kernels/shape_kernel.cc" + "../host_kernels/shape_n_kernel.cc" + "../host_kernels/size_kernel.cc" + "../host_kernels/slice_d_kernel.cc" + "../host_kernels/slice_kernel.cc" + "../host_kernels/squeeze_kernel.cc" + "../host_kernels/unsqueeze_kernel.cc" + "../host_kernels/ssd_prior_box_kernel.cc" + "../host_kernels/strided_slice_kernel.cc" + "../host_kernels/sub_kernel.cc" + "../host_kernels/transdata_kernel.cc" + "../host_kernels/unpack_kernel.cc" + "../graph/passes/pass_utils.cc" + "../graph/common/bcast.cc" + "../common/fp16_t.cc" + "../common/formats/format_transfers/format_transfer_transpose.cc" + "../common/formats/utils/formats_trans_utils.cc" ) ######## libge_executor.a ######## @@ -105,9 +187,9 @@ target_include_directories(ge_executor PRIVATE ${CMAKE_BINARY_DIR}/proto/ge #### yellow zone #### ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_libraries(ge_executor PRIVATE @@ -147,9 +229,9 @@ target_include_directories(ge_executor_shared PRIVATE ${CMAKE_BINARY_DIR}/proto/ge #### yellow zone #### ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_libraries(ge_executor_shared PRIVATE @@ -158,7 +240,7 @@ target_link_libraries(ge_executor_shared PRIVATE -Wl,--no-as-needed ge_common runtime - slog + slog mmpa graph register diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index add95372..3e916916 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -39,6 +39,8 @@ #include "graph/manager/graph_var_manager.h" #include "graph/load/new_model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "graph/opsproto_manager.h" +#include "ge_local_engine/engine/host_cpu_engine.h" using std::string; using std::vector; @@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener { std::shared_ptr listener; }; +static void InitOpsProtoManger() { + string opsproto_path; + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + string path = path_env; + string file_path = RealPath(path.c_str()); + if (file_path.empty()) { + GELOGE(FAILED, "File path %s is invalid.", path.c_str()); + return; + } + opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); + GELOGI("Get opsproto so path from env : %s", path.c_str()); + } else { + string path_base = PluginManager::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); + } + + GELOGI("Get opsproto path is %s", opsproto_path.c_str()); + OpsProtoManager *manager = OpsProtoManager::Instance(); + map option_tmp; + option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); + (void)manager->Initialize(option_tmp); +} + GeExecutor::GeExecutor() {} Status GeExecutor::Initialize() { @@ -230,6 +259,16 @@ Status GeExecutor::Initialize() { return ge::SUCCESS; } + OpTilingManager::GetInstance().LoadSo(); + + Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); + if (initHostCpuEngineStatus != SUCCESS) { + GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); + return initHostCpuEngineStatus; + } + + InitOpsProtoManger(); + std::vector mem_type(1, RT_MEMORY_HBM); mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); @@ -600,10 +639,16 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return ACL_ERROR_GE_INTERNAL_ERROR; } - std::shared_ptr davinci_model = ModelManager::GetInstance()->GetModel(model_id); - if (davinci_model != nullptr) { - uint64_t session_id = davinci_model->GetSessionId(); + std::shared_ptr hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + uint64_t session_id = hybrid_davinci_model->GetSessionId(); VarManagerPool::Instance().RemoveVarManager(session_id); + } else { + std::shared_ptr davinci_model = ModelManager::GetInstance()->GetModel(model_id); + if (davinci_model != nullptr) { + uint64_t session_id = davinci_model->GetSessionId(); + VarManagerPool::Instance().RemoveVarManager(session_id); + } } ret = GraphLoader::UnloadModel(model_id); if (ret != SUCCESS) { @@ -933,6 +978,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat */ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, ge::RunModelData &run_output_data, bool async_mode) { + std::vector input_desc = {}; + std::vector output_desc = {}; + return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); +} + +/** +* @ingroup ge +* @brief Synchronous execution of offline model(Do not create thread) +* @param [in] uint32_t model_id: Model ID to execute + void* stream: stream to execute + const domi::InputData *input_data: Model input data + const std::vector &input_desc: Description of model input data + bool async_mode: is asynchronize mode +* @param [out] domi::OutputData *output_data: Model output data +* @param [out] std::vector &output_desc: Description of model output data +* @return SUCCESS handle successfully / others handle failed +*/ +Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, + const std::vector &input_desc, ge::RunModelData &run_output_data, + std::vector &output_desc, bool async_mode) { if (!isInit_) { GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return ACL_ERROR_GE_EXEC_NOT_INIT; @@ -957,7 +1022,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel } } - return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data); + return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); } /** diff --git a/ge/executor/module.mk b/ge/executor/module.mk index 4a0188be..9566ca64 100644 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -61,9 +61,91 @@ local_ge_executor_src_files := \ ../single_op/task/tbe_task_builder.cc \ ../single_op/task/aicpu_task_builder.cc \ ../single_op/task/aicpu_kernel_task_builder.cc \ - ../hybrid/hybrid_davinci_model_stub.cc\ ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ ../graph/common/local_context.cc \ + ../hybrid/common/tensor_value.cc \ + ../hybrid/common/npu_memory_allocator.cc \ + ../hybrid/executor/rt_callback_manager.cc \ + ../hybrid/executor/node_state.cc \ + ../hybrid/executor/node_done_manager.cc \ + ../hybrid/executor/hybrid_profiler.cc \ + ../hybrid/executor/hybrid_model_executor.cc \ + ../hybrid/executor/hybrid_model_async_executor.cc \ + ../hybrid/executor/hybrid_execution_context.cc \ + ../hybrid/executor/subgraph_context.cc \ + ../hybrid/executor/subgraph_executor.cc \ + ../hybrid/executor/worker/task_compile_engine.cc \ + ../hybrid/executor/worker/shape_inference_engine.cc \ + ../hybrid/executor/worker/execution_engine.cc \ + ../hybrid/model/hybrid_model.cc \ + ../hybrid/model/hybrid_model_builder.cc \ + ../hybrid/model/node_item.cc \ + ../hybrid/model/graph_item.cc \ + ../hybrid/node_executor/aicore/aicore_node_executor.cc \ + ../hybrid/node_executor/aicore/aicore_op_task.cc \ + ../hybrid/node_executor/aicore/aicore_task_builder.cc \ + ../hybrid/node_executor/aicpu/aicpu_node_executor.cc \ + ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ + ../hybrid/node_executor/ge_local/ge_local_node_executor.cc \ + ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \ + ../hybrid/node_executor/host_cpu/kernel_factory.cc \ + ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ + ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ + ../hybrid/node_executor/controlop/control_op_executor.cc \ + ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ + ../hybrid/node_executor/rts/rts_node_executor.cc \ + ../hybrid/node_executor/node_executor.cc \ + ../hybrid/node_executor/task_context.cc \ + ../hybrid/hybrid_davinci_model.cc \ + ../ge_local_engine/engine/host_cpu_engine.cc \ + ../graph/common/omg_util.cc \ + ../graph/manager/host_mem_manager.cc \ + ../graph/build/memory/var_mem_assign_util.cc \ + ../host_kernels/transpose_kernel.cc \ + ../host_kernels/add_kernel.cc \ + ../host_kernels/broadcast_args_kernel.cc \ + ../host_kernels/broadcast_gradient_args_kernel.cc \ + ../host_kernels/cast_kernel.cc \ + ../host_kernels/concat_offset_kernel.cc \ + ../host_kernels/concat_v2_kernel.cc \ + ../host_kernels/dynamic_stitch_kernel.cc \ + ../host_kernels/identity_kernel.cc \ + ../host_kernels/empty_kernel.cc \ + ../host_kernels/expanddims_kernel.cc \ + ../host_kernels/fill_kernel.cc \ + ../host_kernels/floordiv_kernel.cc \ + ../host_kernels/floormod_kernel.cc \ + ../host_kernels/gather_v2_kernel.cc \ + ../host_kernels/greater_kernel.cc \ + ../host_kernels/kernel_utils.cc \ + ../host_kernels/maximum_kernel.cc \ + ../host_kernels/mul_kernel.cc \ + ../host_kernels/pack_kernel.cc \ + ../host_kernels/permute_kernel.cc \ + ../host_kernels/range_kernel.cc \ + ../host_kernels/rank_kernel.cc \ + ../host_kernels/reduce_prod_kernel.cc \ + ../host_kernels/reshape_kernel.cc \ + ../host_kernels/rsqrt_kernel.cc \ + ../host_kernels/shape_kernel.cc \ + ../host_kernels/shape_n_kernel.cc \ + ../host_kernels/size_kernel.cc \ + ../host_kernels/slice_d_kernel.cc \ + ../host_kernels/slice_kernel.cc \ + ../host_kernels/squeeze_kernel.cc \ + ../host_kernels/unsqueeze_kernel.cc \ + ../host_kernels/ssd_prior_box_kernel.cc \ + ../host_kernels/strided_slice_kernel.cc \ + ../host_kernels/sub_kernel.cc \ + ../host_kernels/transdata_kernel.cc \ + ../host_kernels/unpack_kernel.cc \ + ../graph/passes/pass_utils.cc \ + ../graph/common/bcast.cc \ + ../common/fp16_t.cc \ + ../common/formats/format_transfers/format_transfer_transpose.cc \ + ../common/formats/utils/formats_trans_utils.cc \ local_ge_executor_c_include := \ proto/insert_op.proto \ diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index 76590172..615a968f 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES ) ############ libge_local_opskernel_builder.a ############ -add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) +add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) target_compile_options(ge_local_opskernel_builder_static PRIVATE -Werror diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index b14cbb3d..c836d4d6 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -95,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { void HostCpuEngine::CloseSo() { for (auto handle : lib_handles_) { - if (dlclose(handle) != 0) { - GELOGW("failed to close handle, message: %s", dlerror()); + if (mmDlclose(handle) != 0) { + GELOGW("failed to close handle, message: %s", mmDlerror()); } } lib_handles_.clear(); @@ -322,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector &lib_paths) { Status HostCpuEngine::LoadLib(const std::string &lib_path) { GELOGI("To invoke dlopen on lib: %s", lib_path.c_str()); - auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); if (handle == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror()); + GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror()); return INTERNAL_ERROR; } - auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); + auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize"); if (initialize != nullptr) { GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); if (initialize(HostCpuContext()) != SUCCESS) { diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index cc6b578c..0b99ecac 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -20,7 +20,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "graph/operator.h" -#include "register/register.h" +#include "external/../register/register.h" namespace ge { class HostCpuEngine { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 0fa1e1ee..19c0083c 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -30,6 +30,7 @@ #include "model/ge_model.h" #include "graph/ge_context.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "graph/utils/op_desc_utils.h" using domi::BuildMode; @@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); } +static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, + const std::vector &in_anchors, const std::string &name) { + GE_CHECK_NOTNULL(out_anchor); + NodePtr in_node = out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node); + OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); + OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) + .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) + .Build(); + (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); + if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + +static Status GenerateTaskForConstant(const std::shared_ptr &graph) { + for (auto &node : graph->GetDirectNode()) { + // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto op_type = op_desc->GetType(); + if (op_type == NETOUTPUT) { + for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { + const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + NodePtr in_node = peer_out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node); + + std::string in_node_op_type = in_node->GetType(); + if (in_node_op_type == CONSTANT) { + GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); + std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; + if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { + GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); + return FAILED; + } + } + } + } + } + return SUCCESS; +} + Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, @@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { continue; } + + GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); + if (sub_graph->GetGraphUnknownFlag()) { // unknown shape build flow GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index 2eeecc0f..aa825a5d 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da /// @param [in] stream stream to execute model on /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data +/// @param [in] input_desc description of model input data /// @param [out] output_data model output data +/// @param [out] output_desc description of model output data /// Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data) { + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data); + Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); if (ret != SUCCESS) { GELOGE(ret, "Execute model failed, model_id:%u.", model_id); return ret; diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index b581f2fa..974af5c1 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -65,7 +65,8 @@ class GraphLoader { const std::vector &output_queue_ids); static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data); + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc); static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index c660f797..37b1fb4f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -117,7 +117,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetWeight(); std::size_t weights_size = weights.GetSize(); GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); - if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { - GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); + if ((weight_ptr != nullptr) && (weight_size < weights_size)) { + GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); return FAILED; } - if ((weight_ptr != nullptr) && (weight_size < weights_size)) { - GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); + weights_mem_base_ = static_cast(dev_ptr); + is_inner_weight_base_ = false; + + if (weights_size != 0) { + weights_mem_base_ = static_cast(weight_ptr); + is_inner_weight_base_ = false; + if (weight_ptr == nullptr) { + weights_mem_base_ = MallocWeightsMem(weights_size); + if (weights_mem_base_ == nullptr) { + GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); + return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; + } + is_inner_weight_base_ = true; + } + GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + weights_mem_base_, weights_size); + GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); + GELOGI("copy weights data to device"); + } + + runtime_param_.weight_base = weights_mem_base_; + return SUCCESS; +} + + +Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { + if (is_feature_map_mem_has_inited_) { + GELOGE(FAILED, "call InitFeatureMapMem more than once ."); + return FAILED; + } + is_feature_map_mem_has_inited_ = true; + + std::size_t data_size = TotalMemSize(); + std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; + + if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { + GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); return FAILED; } mem_base_ = static_cast(dev_ptr); p2p_mem_base_ = static_cast(dev_ptr); - weights_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; - is_inner_weight_base_ = false; if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); @@ -298,12 +330,14 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } - GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); - weights_mem_base_ = mem_base_; + if (!is_inner_weight_base_) { + weights_mem_base_ = mem_base_; + is_inner_weight_base_ = true; + } is_inner_mem_base_ = true; - is_inner_weight_base_ = true; } if (p2p_data_size != 0) { @@ -312,27 +346,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); return GE_EXEC_ALLOC_P2P_MEM_FAILED; } - GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, p2p_mem_base_, p2p_data_size); is_inner_p2p_mem_base_ = true; } - if (weights_size != 0) { - weights_mem_base_ = static_cast(weight_ptr); - is_inner_weight_base_ = false; - if (weight_ptr == nullptr) { - weights_mem_base_ = MallocWeightsMem(weights_size); - if (weights_mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); - return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; - } - is_inner_weight_base_ = true; - } - GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - weights_mem_base_, weights_size); - GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); - } - GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; @@ -642,8 +660,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_TIMESTAMP_START(InitModelMem); GELOGD("Known node is %d", known_node_); + GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size)); if (!known_node_) { - GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); + GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); data_inputer_ = new (std::nothrow) DataInputer(); GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); } @@ -1140,6 +1159,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); } + return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 893c3d49..650f19eb 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -584,7 +584,8 @@ class DavinciModel { Status SyncVarData(); - Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); + Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size); + Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size); void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); @@ -850,7 +851,9 @@ class DavinciModel { Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); - bool is_model_has_inited_; + bool is_weight_mem_has_inited_; + bool is_feature_map_mem_has_inited_; + uint32_t model_id_; uint32_t runtime_model_id_; string name_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 080ca889..6f20f63d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -31,6 +31,7 @@ #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "common/formats/utils/formats_trans_utils.h" +#include "hybrid/hybrid_davinci_model.h" namespace ge { thread_local uint32_t device_count = 0; @@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { std::lock_guard lock(map_mutex_); + auto hybrid_davinci_model = hybrid_model_map_.find(model_id); + if (hybrid_davinci_model != hybrid_model_map_.end()) { + uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); + DestroyAicpuSession(session_id); + return SUCCESS; + } + auto it = model_map_.find(model_id); if (it == model_map_.end()) { GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); @@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &output_desc, std::vector &inputFormats, std::vector &outputFormats, bool new_model_desc) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + hybrid_davinci_model->SetModelDescVersion(new_model_desc); + return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); @@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector> &batch_info, int32_t &dynamic_type) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); @@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector &user_input_shape_order) { + auto hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); + return SUCCESS; + } + auto davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) @@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector & } Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info); + return SUCCESS; + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); davinci_model->GetModelAttr(dynamic_output_shape_info); @@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data input data +/// @param [in] input_desc description of input data /// @param [out] output_data output data +/// @param [out] output_desc description of output data /// Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data) { + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc) { + std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); + if (hybrid_davinci_model != nullptr) { + auto inputs = input_data.blobs; + auto outputs = output_data.blobs; + + Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream); + if (status == SUCCESS) { + GELOGI("Execute model %u success.", model_id); + } + return status; + } + std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9821a4ab..e3780d5b 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data + /// @param [in] input_desc description of model input data /// @param [out] output_data model output data + /// @param [out] output_desc description of model output data /// ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, - OutputData &output_data); + const std::vector &input_desc, OutputData &output_data, + std::vector &output_desc); ge::Status SyncExecuteModel(uint32_t model_id, const std::vector &inputs, std::vector &outputs); diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 87fac994..95f13b6f 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -26,6 +26,7 @@ #include #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "framework/common/types.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" @@ -72,7 +73,7 @@ Status DynamicShapePartitioner::Partition() { } REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); - + REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!"); DumpGraph("_Before_DSP"); auto status = PartitionImpl(); GELOGD("%s.", DebugString().c_str()); @@ -86,6 +87,50 @@ Status DynamicShapePartitioner::Partition() { return status; } +Status DynamicShapePartitioner::CtrlEdgeTransfer() { + GELOGD("Do ctrl edge transfer start!"); + GE_CHECK_NOTNULL(root_graph_); + + bool is_dynamic_shape = false; + (void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); + if (!is_dynamic_shape) { + return SUCCESS; + } + for (auto &subgraph : root_graph_->GetAllSubgraphs()) { + for (ge::NodePtr &n : subgraph->GetDirectNode()) { + auto op_desc = n->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto op_type = op_desc->GetType(); + if (op_type == CONSTANT || op_type == CONSTANTOP) { + if (n->GetInAllNodes().empty()) { + GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str()); + continue; + } + + GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str()); + + for (auto &in_control_node : n->GetInControlNodes()) { + GE_CHECK_NOTNULL(in_control_node); + GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), + n->GetInControlAnchor()), "remove edge failed"); + for (auto &out_node : n->GetOutNodes()) { + if (out_node == nullptr) { + continue; + } + GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), + out_node->GetInControlAnchor()), "add edge failed."); + } + } + } + } + } + + GELOGD("Do ctrl edge transfer end!"); + return SUCCESS; +} + Status DynamicShapePartitioner::PartitionImpl() { REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index b0477ae8..9772615e 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -151,6 +151,7 @@ class DynamicShapePartitioner { Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); + Status CtrlEdgeTransfer(); ge::ComputeGraphPtr root_graph_; // The original graph to partition std::unordered_map> node_2_cluster_; // Record nodes and the cluster it belongs to // topological sorted clusters, this field will change with the splitting. diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc index 5359ff63..3adfbde3 100644 --- a/ge/graph/passes/pass_utils.cc +++ b/ge/graph/passes/pass_utils.cc @@ -37,10 +37,6 @@ #include "graph/utils/type_utils.h" namespace ge { -namespace { -const uint32_t kShapeDimSize = 1; -const uint32_t DIM_SIZE_TWO = 2; -} // namespace Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector &data, std::vector &v_output, const bool scalar_output) { diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 21fb1eaf..689510f0 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); if (node->GetType() == CAST) { trans_data_type = true; - } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) { + } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) { trans_format = true; trans_shape = true; } else if (node->GetType() == TRANSDATA) { diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index 02b5f996..97b5a0f5 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -8,7 +8,7 @@ set(SRC_LIST "engine/host_cpu_engine.cc" "ops_kernel_store/host_cpu_ops_kernel_info.cc" "ops_kernel_store/op/op_factory.cc" - "ops_kernel_store/op/host_op.cc" + "ops_kernel_store/op/host_op.cc" ) set(CPU_OPS_KERNEL_LIST @@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE set_target_properties(atc_host_cpu_engine PROPERTIES OUTPUT_NAME host_cpu_engine - LIBRARY_OUTPUT_DIRECTORY atclib + LIBRARY_OUTPUT_DIRECTORY atclib ) ############ libhost_cpu_opskernel_builder.so ############ @@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES ) ############ libhost_cpu_opskernel_builder.a ############ -add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) +add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) target_compile_options(host_cpu_opskernel_builder_static PRIVATE -Werror diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc index e254af09..df381212 100644 --- a/ge/host_kernels/floordiv_kernel.cc +++ b/ge/host_kernels/floordiv_kernel.cc @@ -112,8 +112,8 @@ void FloorDivKernel::ShapeCal(const std::vector &input, Ge template T FloorDivKernel::DivCal(const T &x_i, const T &y_i) { if ((x_i < static_cast(0)) != (y_i < static_cast(0))) { - T abs_x_i = std::abs(x_i); - T abs_y_i = std::abs(y_i); + T abs_x_i = x_i < 0 ? -x_i : x_i; + T abs_y_i = y_i < 0 ? -y_i : y_i; return static_cast(static_cast(-(abs_x_i + abs_y_i - 1) / abs_y_i)); } else { return static_cast(static_cast(x_i / y_i)); diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h index d3dc3ff7..b8f6dd12 100755 --- a/ge/host_kernels/floordiv_kernel.h +++ b/ge/host_kernels/floordiv_kernel.h @@ -40,10 +40,6 @@ class FloorDivKernel : public Kernel { template Status DataCal(const std::vector &input, ge::GeTensorPtr output_ptr); Status ComputeByDataType(DataType data_type, const std::vector &input, GeTensorPtr output_ptr); - - int64_t axis_dim_; - int64_t head_dim_; - int64_t end_dim_; }; } // namespace ge diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc index b93a4047..57af4026 100644 --- a/ge/host_kernels/ssd_prior_box_kernel.cc +++ b/ge/host_kernels/ssd_prior_box_kernel.cc @@ -187,7 +187,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin return PARAM_INVALID; } - uint tmp_value = aspect_ratios_size * min_sizes_size; + uint32_t tmp_value = aspect_ratios_size * min_sizes_size; if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; @@ -199,7 +199,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin return PARAM_INVALID; } num_priors = static_cast(tmp_value); - + if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; @@ -288,7 +288,7 @@ std::unique_ptr SsdPriorboxKernel::BoundaryCalulate(int dim_size, int l } } - return std::move(output_data); + return output_data; } Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector &v_output) { diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 0910d2c7..0fa5a5d7 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -77,7 +77,7 @@ do { \ RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACKS, fmt, "Callback", name, ##__VA_ARGS__) } // namespace hybrid } // namespace ge #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 468a7014..91996ab3 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -353,6 +353,44 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a return SUCCESS; } +Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc) { + GELOGI("Start to execute model."); + + HybridModelExecutor::ExecuteArgs args; + args.inputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { + TensorValue tensor_value(inputs[i].data, inputs[i].length); + args.inputs[i] = tensor_value; + } + GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); + for (const auto &output_tensor_desc : args.output_desc) { + output_desc.emplace_back(*output_tensor_desc); + } + + for (size_t i = 0; i < args.outputs.size(); ++i) { + int64_t output_real_size = 0; + ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); + if (graph_status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get tensor size in bytes failed."); + return FAILED; + } + if (output_real_size > 0) { + if (outputs[i].length < static_cast(output_real_size)) { + GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", + i, outputs[i].length, output_real_size); + return FAILED; + } + GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); + } + outputs[i].length = output_real_size; + } + + return SUCCESS; +} + Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector &outputs) { GELOGD("Start to execute model."); // prepare inputs diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 8de2beb6..21833b0b 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -35,6 +35,11 @@ class HybridModelAsyncExecutor { Status Init(); + Status Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc); + Status Execute(const vector &inputs, vector &outputs); Status Start(const std::shared_ptr &listener); diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h index f6027a0b..94a042e4 100644 --- a/ge/hybrid/executor/hybrid_profiler.h +++ b/ge/hybrid/executor/hybrid_profiler.h @@ -33,7 +33,7 @@ class HybridProfiler { SHAPE_INFERENCE, COMPILE, EXECUTION, - CALLBACK + CALLBACKS }; struct Event { diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 48b2ed72..04f1ee4b 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -27,7 +27,7 @@ namespace ge { namespace hybrid { class NodeTask; -class GraphExecutionContext; +struct GraphExecutionContext; class SubgraphContext; class ShapeFuture { diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index b6f5bb84..7009331c 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -38,6 +38,14 @@ class HybridDavinciModel::Impl { return SUCCESS; } + Status Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, + rtStream_t stream) { + return executor_.Execute(inputs, input_desc, outputs, output_desc); + } + Status Execute(const vector &inputs, vector &outputs) { return executor_.Execute(inputs, outputs); } @@ -68,6 +76,33 @@ class HybridDavinciModel::Impl { executor_.SetDeviceId(device_id); } + uint64_t GetSessionId() { + return model_.GetSessionId(); + } + + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + return model_.GetDynamicBatchInfo(batch_info, dynamic_type); + } + + void GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { + model_.GetUserDesignateShapeOrder(user_input_shape_order); + } + + void GetModelAttr(std::vector &dynamic_output_shape_info) { + model_.GetModelAttr(dynamic_output_shape_info); + } + + Status GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); + } + + void SetModelDescVersion(bool is_new_model_desc) { + model_.SetModelDescVersion(is_new_model_desc); + } + private: std::shared_ptr listener_; HybridModel model_; @@ -95,6 +130,14 @@ Status HybridDavinciModel::Init() { return impl_->Init(); } +Status HybridDavinciModel::Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, rtStream_t stream) { + GE_CHECK_NOTNULL(impl_); + return impl_->Execute(inputs, input_desc, outputs, output_desc, stream); +} + Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { GE_CHECK_NOTNULL(impl_); return impl_->Execute(inputs, outputs); @@ -132,5 +175,41 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { impl_->SetDeviceId(device_id); } } + +Status HybridDavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + GE_CHECK_NOTNULL(impl_); + return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); +} + +void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { + if (impl_ != nullptr) { + impl_->GetUserDesignateShapeOrder(user_input_shape_order); + } +} + +void HybridDavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { + if (impl_ != nullptr) { + impl_->GetModelAttr(dynamic_output_shape_info); + } +} + +Status HybridDavinciModel::GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + GE_CHECK_NOTNULL(impl_); + return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); +} + +void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { + if (impl_ != nullptr) { + impl_->SetModelDescVersion(is_new_model_desc); + } +} + +uint64_t HybridDavinciModel::GetSessionId() { + GE_CHECK_NOTNULL(impl_); + return impl_->GetSessionId(); +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 00a48c1e..5349390c 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -37,6 +37,12 @@ class HybridDavinciModel { Status Init(); + Status Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, + rtStream_t stream); + Status Execute(const vector &inputs, vector &outputs); Status ModelRunStart(); @@ -51,6 +57,21 @@ class HybridDavinciModel { void SetDeviceId(uint32_t device_id); + uint64_t GetSessionId(); + + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type); + + void GetUserDesignateShapeOrder(std::vector &user_input_shape_order); + + void GetModelAttr(std::vector &dynamic_output_shape_info); + + Status GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats); + + void SetModelDescVersion(bool is_new_model_desc); + private: HybridDavinciModel() = default; class Impl; diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc index b95b9efc..366845c5 100644 --- a/ge/hybrid/hybrid_davinci_model_stub.cc +++ b/ge/hybrid/hybrid_davinci_model_stub.cc @@ -28,6 +28,14 @@ Status HybridDavinciModel::Init() { return UNSUPPORTED; } +Status HybridDavinciModel::Execute(const std::vector &inputs, + const std::vector &input_desc, + std::vector &outputs, + std::vector &output_desc, + rtStream_t stream) { + return UNSUPPORTED; +} + Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { return UNSUPPORTED; } @@ -52,5 +60,29 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { void HybridDavinciModel::SetDeviceId(uint32_t device_id) { } + +uint64_t HybridDavinciModel::GetSessionId() { + return 0; +} + +Status HybridDavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + return UNSUPPORTED; +} + +void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { +} + +void HybridDavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { +} + +Status HybridDavinciModel::GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + return UNSUPPORTED; +} + +void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { +} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 59c7be9a..c319b06b 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -21,12 +21,18 @@ #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/model/hybrid_model_builder.h" #include "hybrid/node_executor/node_executor.h" +#include "common/op/ge_op_utils.h" namespace ge { namespace hybrid { +namespace { +const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size +} + HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { } @@ -128,7 +134,187 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c } const string &HybridModel::GetModelName() const { - return model_name_; + return model_name_; +} + +Status HybridModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) { + // dynamic shape do not need dynamic batch + batch_info = {}; + dynamic_type = -1; + return SUCCESS; +} + +void HybridModel::GetUserDesignateShapeOrder(std::vector &user_input_shape_order) { + // dynamic shape do not need dynamic batch + user_input_shape_order = {}; +} + +void HybridModel::GetModelAttr(std::vector &dynamic_output_shape_info) { + dynamic_output_shape_info = {}; +} + +Status HybridModel::GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &output_formats) { + auto node_item_list = root_graph_item_->GetInputNodes(); + if (node_item_list.empty()) { + GELOGE(FAILED, "node item list is empty!"); + return FAILED; + } + + GE_CHECK_NOTNULL(node_item_list[0]->node); + GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc()); + if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) { + GELOGE(FAILED, "input size of op is not 1!"); + return FAILED; + } + + GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); + + return SUCCESS; +} + +void HybridModel::SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, + InputOutputDescInfo &input) { + for (auto model_input_dim : model_input_dims) { + input.shape_info.dims.push_back(model_input_dim); + } + input.shape_info.shape_ranges = shape_ranges; + return; +} + +void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input) { + std::vector> shape_ranges; + if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { + // When static aipp is set, need to get the model input dims which processed by aipp + vector model_input_dims; + (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); + SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, input); + return; + } + // judge if this data is linked dynamic aipp first, multiply batch has been considered + if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { + vector dynamic_aipp_input_dims; + (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); + SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, input); + return; + } else { + vector input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); + op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges); + SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, input); + return; + } +} + +Status HybridModel::GetInputDescInfo(vector &input_desc, std::vector &formats) { + auto node_item_list = root_graph_item_->GetInputNodes(); + for (auto &node_item : node_item_list) { + InputOutputDescInfo input; + + GE_CHECK_NOTNULL(node_item->node); + auto op_desc = node_item->node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); + + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); + + int64_t input_size = 0; + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); + + // support dynamic shape + if (input_size < 0) { + GELOGD("dynamic shape scene, input size is unknown. " + "format=%d, data_type=%d, input_size=%ld", + format, input.data_type, input_size); + input_size = kMemSizeUnknownShape; // -1 + } + + // not support dynamic shape input for now, so input_size here will be not less than zero. + input.size = input_size; + + CreateInputDimsInfo(op_desc, input); + + formats.push_back(format); + input_desc.push_back(input); + } + is_new_model_desc_ = false; + return SUCCESS; +} + +void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { + GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); + Format format = output_desc->GetFormat(); + GeShape shape = output_desc->GetShape(); + std::vector> shape_ranges; + output_desc->GetShapeRange(shape_ranges); + DataType data_type = output_desc->GetDataType(); + format_result = format; + if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK + int64_t k = shape.GetDim(0); // 0: first dim + int64_t c = shape.GetDim(1); // 1: second dim + int64_t h = shape.GetDim(2); // 2: third dim + int64_t w = shape.GetDim(3); // 3: forth dim + output_desc_info.shape_info.dims.push_back(h); + output_desc_info.shape_info.dims.push_back(w); + output_desc_info.shape_info.dims.push_back(c); + output_desc_info.shape_info.dims.push_back(k); + if (shape_ranges.size() == 4) { // 4 dims + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]); // h:2 + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]); // w:3 + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]); // c:1 + output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]); // k:0 + } + format_result = FORMAT_HWCN; + } else { + for (size_t j = 0; j < shape.GetDimNum(); j++) { + output_desc_info.shape_info.dims.push_back(shape.GetDim(j)); + } + output_desc_info.shape_info.shape_ranges = shape_ranges; + } + int64_t tensor_size = 0; + (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); + output_desc_info.size = static_cast(tensor_size); + output_desc_info.data_type = output_desc->GetDataType(); +} + +Status HybridModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { + std::vector output_desc_list; + GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc + + vector out_node_names; + (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); + + GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode()); + auto op_desc = root_graph_item_->GetOutputNode()->op_desc; + GE_CHECK_NOTNULL(op_desc); + + auto out_size = static_cast(op_desc->GetInputsSize()); + GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); + + for (uint32_t index = 0; index < out_size; ++index) { + string output_name; + std::vector src_name = op_desc->GetSrcName(); + std::vector src_index = op_desc->GetSrcIndex(); + if (out_size == out_node_names.size()) { + bool contains_colon = out_node_names[index].find(":") != std::string::npos; + output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); + } else { + output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); + } + + InputOutputDescInfo output_desc_info; + output_desc_info.name = output_name; + + uint32_t format_result; + CreateOutput(output_desc_list[index], output_desc_info, format_result); + output_desc.push_back(output_desc_info); + formats.push_back(format_result); + } + return SUCCESS; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 11311968..1bc08053 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -83,6 +83,30 @@ class HybridModel { const string &GetModelName() const; + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type); + + void GetUserDesignateShapeOrder(std::vector &user_input_shape_order); + + void GetModelAttr(std::vector &dynamic_output_shape_info); + + Status GetInputOutputDescInfo(vector &input_desc, + vector &output_desc, + std::vector &input_formats, + std::vector &outputFormats); + + Status GetInputDescInfo(vector &input_desc, std::vector &formats); + + void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result); + + Status GetOutputDescInfo(vector &output_desc, std::vector &formats); + + void CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input); + + void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } + + void SetInputDimsAndShapeRangesInfo(const vector &model_input_dims, std::vector> &shape_ranges, + InputOutputDescInfo &input); + private: friend class HybridModelBuilder; friend class HybridModelAsyncExecutor; @@ -101,6 +125,8 @@ class HybridModel { std::map> subgraph_items_; std::map> node_items_; + bool is_new_model_desc_ = false; // support aipp + // runtime fields uint32_t device_id_ = 0; uint32_t model_id_ = 0; diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index cd4c0a83..d519c35b 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -27,16 +27,41 @@ #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" +#include "framework/common/debug/ge_log.h" +#include "graph/utils/attr_utils.h" namespace ge { namespace hybrid { namespace { const uint32_t kSubgraphIndex = 0U; const uint32_t kVarOutputIndex = 0U; -const uint32_t kAlignment = 32; const int kBytes = 8; const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; +Status SetOutputNameAttr(ComputeGraph &graph) { + vector output_names; + for (const auto &node : graph.GetDirectNode()) { + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto op_type = op_desc->GetType(); + if (op_type == NETOUTPUT) { + for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { + const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + NodePtr in_node = peer_out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(in_node); + output_names.push_back(in_node->GetName()); + } + } + } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names), + GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); + return FAILED); + return SUCCESS; +} + int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { int64_t var_size = 0; auto data_type = desc.GetDataType(); @@ -939,6 +964,10 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr Status HybridModelBuilder::IndexTaskDefs() { const auto &root_graph = ge_root_model_->GetRootGraph(); + if (SetOutputNameAttr(*root_graph) != SUCCESS) { + GELOGW("Set output name attr failed."); + } + for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { auto &name = it.first; auto &ge_model = it.second; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 998afd02..80ea579b 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -19,6 +19,7 @@ #include "framework/common/debug/log.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" +#include "graph/load/new_model_manager/tbe_handle_store.h" using optiling::OpRunInfo; @@ -36,6 +37,58 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) return SUCCESS; } +Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { + auto op_desc_ptr = std::make_shared(op_desc); + GE_CHECK_NOTNULL(op_desc_ptr); + auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + if (tbe_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); + return INTERNAL_ERROR; + } + TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); + rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); + if (rt_ret != RT_ERROR_NONE) { + void *bin_handle = nullptr; + if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { + GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + rtDevBinary_t binary; + std::string json_string; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), + GELOGI("Get original type of session_graph_id.")); + if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + } else { + GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); + return PARAM_INVALID; + } + binary.version = 0; + binary.data = tbe_kernel->GetBinData(); + binary.length = tbe_kernel->GetBinDataSize(); + GELOGI("TBE: binary.length: %lu", binary.length); + GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); + std::string meta_data; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), + GELOGI("Get original type of json_string")); + GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); + GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); + kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); + } else { + GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + kernel_store.ReferTBEHandle(stub_name_.c_str()); + } + std::string kernel_name; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), + GELOGI("Get original type of kernel_name")); + GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); + GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); + } + return SUCCESS; +} + Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET(ValidateTaskDef(task_def), "[%s] Failed to validate task def: [%s]", @@ -45,6 +98,9 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); stub_name_ = kernel_def.stub_func(); + + GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); + GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); args_size_ = kernel_def.args_size(); block_dim_ = kernel_def.block_dim(); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 0447ade7..5818f384 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -62,6 +62,7 @@ class AiCoreOpTask { static Status ValidateTaskDef(const domi::TaskDef &task_def); Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); Status InitTilingInfo(const OpDesc &op_desc); + Status RegisterTbeHandle(const OpDesc &op_desc); std::string stub_name_; void *stub_func_ = nullptr; diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index bf948349..b6dfd82b 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -26,7 +26,7 @@ namespace hybrid { class AiCoreTaskCompiler : public TaskCompiler { public: AiCoreTaskCompiler() = default; - ~AiCoreTaskCompiler() = default; + ~AiCoreTaskCompiler() override = default; Status CompileOp(const NodePtr &node, std::vector &tasks) override; Status Initialize() override; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index b984cc86..1205b190 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -37,6 +37,8 @@ class AicpuNodeTaskBase : public NodeTask { ~AicpuNodeTaskBase() override = default; + using NodeTask::Init; + virtual Status Init(const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.h b/ge/hybrid/node_executor/controlop/control_op_executor.h index 7520afd1..3becfaaa 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.h +++ b/ge/hybrid/node_executor/controlop/control_op_executor.h @@ -25,6 +25,7 @@ namespace ge { namespace hybrid { class ControlOpNodeTask : public NodeTask { public: + using NodeTask::Init; virtual Status Init(const NodePtr &node, const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index 7a83641d..a52e5670 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -68,7 +68,7 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { node_name_.c_str(), node_type_.c_str(), output_num, input_num); return INTERNAL_ERROR; } - for (uint32_t out_index = 0; out_index < output_num; ++out_index) { + for (uint32_t out_index = 0; out_index < static_cast(output_num); ++out_index) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 3bf71013..01fd391d 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -20,7 +20,6 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" namespace { -const size_t kAssignInputNum = 2; const size_t kAssignRefInputIndex = 0; const size_t kAssignValueInputIndex = 1; const size_t kAssignRefOutputIndex = 0; diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index e577f09b..95e50c31 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -34,7 +34,6 @@ const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; const char *const kEngineNameHccl = "ops_kernel_info_hccl"; const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; -const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; } Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h index 9ea544a1..73873002 100644 --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h @@ -41,7 +41,6 @@ class PartitionedCallNodeTask : public NodeTask { const GraphItem *graph_item_; std::unique_ptr subgraph_executor_; - GraphExecutionContext *context_ = nullptr; }; class PartitionedCallNodeExecutor : public NodeExecutor { diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 2cff0536..0549a1dc 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -29,7 +29,7 @@ namespace ge { namespace hybrid { -class GraphExecutionContext; +struct GraphExecutionContext; class SubgraphContext; class TaskContext { diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 17dbf928..5a73126f 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -234,6 +234,22 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, ge::RunModelData &output_data, bool async_mode = false); + /// + /// @ingroup ge + /// @brief Synchronous execution of offline model(Do not create thread) + /// @param [in] uint32_t model_id: Model ID to execute + /// @param [in] void* stream: stream to execute + /// @param [in] bool async_mode: is asynchronize mode. + /// @param [in] const domi::InputData *input_data: Model input data + /// @param [in] const std::vector &input_desc: description of model input data + /// @param [out] domi::OutputData *output_data: Model output data + /// @param [out] std::vector &output_desc: description of model output data + /// @return SUCCESS handle successfully / others handle failed + /// + ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, + const std::vector &input_desc, ge::RunModelData &run_output_data, + std::vector &output_desc, bool async_mode = false); + /// /// @ingroup ge /// @brief Get weight memory size from model file From c588b7029c088c0c488c23dbdea47f81a2e4fa0e Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Mon, 7 Dec 2020 20:18:32 +0800 Subject: [PATCH 7/9] modify fwk_atc.bin --- ge/offline/CMakeLists.txt | 4 ++-- ge/offline/module.mk | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index b3a0d53c..2f9195bc 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -183,11 +183,11 @@ target_link_libraries(fwk_atc.bin PRIVATE c_sec graph error_manager - ge_compiler + ge_runner parser_common gflags json - runtime_compile + runtime slog static_mmpa -lrt diff --git a/ge/offline/module.mk b/ge/offline/module.mk index c14be50f..8018266a 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -149,8 +149,8 @@ LOCAL_SHARED_LIBRARIES := \ libgraph \ libregister \ liberror_manager \ - libge_compiler \ - libruntime_compile \ + libge_runner \ + libruntime \ libparser_common \ liberror_manager \ From 7fa1ca9237d095b57f05ac39870fcbd051d2460e Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Mon, 7 Dec 2020 20:39:54 +0800 Subject: [PATCH 8/9] unify mutex for model_aicpu_kernel_ in different func --- ge/graph/load/new_model_manager/model_manager.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 080ca889..74c37a1b 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -216,7 +216,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); @@ -229,7 +229,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { - std::lock_guard lock(sess_ids_mutex_); + std::lock_guard lock(map_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { From 8291221f64bfebc1c572a1410899830594de9d16 Mon Sep 17 00:00:00 2001 From: weiyang Date: Sat, 5 Dec 2020 15:07:17 +0800 Subject: [PATCH 9/9] fix cust aicpu --- ge/graph/load/new_model_manager/model_manager.cc | 14 ++++++++++---- ge/graph/load/new_model_manager/model_manager.h | 2 +- .../task_info/kernel_task_info.cc | 4 +++- .../task_info/super_kernel/super_kernel.cc | 2 +- .../node_executor/aicpu/aicpu_node_executor.cc | 8 ++++++-- ge/single_op/task/aicpu_kernel_task_builder.cc | 8 ++++++-- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 74c37a1b..5d9b6e65 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1243,8 +1243,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { - GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); +Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) { + GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); if (aicpu_kernel == nullptr) { @@ -1267,18 +1267,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ std::map new_so_name; new_so_name.insert({so_name, aicpu_kernel}); cust_aicpu_so_[resource_id] = new_so_name; - GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); return SUCCESS; } auto it_so_name = it->second.find(so_name); if (it_so_name == it->second.end()) { it->second.insert({so_name, aicpu_kernel}); - GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); + return SUCCESS; } + loaded = true; + GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str()); return SUCCESS; } Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { + GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); if (cust_aicpu_so_.size() == 0) return SUCCESS; // get current context diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9821a4ab..c1faed82 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -286,7 +286,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name); + ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded); ge::Status LaunchCustAicpuSo(); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 3e3a715d..7b11c53e 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -875,7 +875,9 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } if (kernel_type_ == ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), + "launch cust aicpu so failed"); } // copy args to new host memory diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index e94fa425..a4d14fb0 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -25,7 +25,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { const void *args[] = {this->GetNavTablePtr(), reinterpret_cast(static_cast(this->GetNavTableSize()))}; - rtError_t rt_ret = rtMalloc(reinterpret_cast(device_args_addr_), sizeof(args), RT_MEMORY_HBM); + rtError_t rt_ret = rtMalloc(reinterpret_cast(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), (void *)args, sizeof(args), diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 573739bc..38407160 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -644,8 +644,12 @@ Status AicpuNodeTask::Init(const HybridModel &model) { const auto &context = kernel_def.context(); auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), + "load cust aicpu so failed."); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + } } GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 600c9c29..0b459e7a 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -62,8 +62,12 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { if (kernel_type == ccKernelType::CUST_AI_CPU) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), + "launch cust aicpu so failed"); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + } } task.num_inputs_ = op_desc_->GetInputsSize();