diff --git a/.gitmodules b/.gitmodules index a17fe30c..5c71bb16 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,8 +1,8 @@ [submodule "parser"] path = parser url = https://gitee.com/ascend/parser.git - branch = development + branch = r1.2.0 [submodule "metadef"] path = metadef url = https://gitee.com/ascend/metadef.git - branch = development + branch = r1.2.0 diff --git a/build.sh b/build.sh index 71a733d1..3c9a537e 100644 --- a/build.sh +++ b/build.sh @@ -224,14 +224,12 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then # fi # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then - echo "Generating coverage statistics, please wait..." - cd ${BASEPATH} - rm -rf ${BASEPATH}/cov - mkdir ${BASEPATH}/cov - lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info - cd ${BASEPATH}/cov - genhtml coverage.info +# echo "Generating coverage statistics, please wait..." +# cd ${BASEPATH} +# rm -rf ${BASEPATH}/cov +# mkdir ${BASEPATH}/cov +# gcovr -r ./ --exclude 'third_party' --exclude 'build' --exclude 'tests' --exclude 'prebuild' --exclude 'inc' --print-summary --html --html-details -d -o cov/index.html +# fi fi # generate output package in tar form, including ut/st libraries/executables diff --git a/cmake/FindModule.cmake b/cmake/FindModule.cmake index 14737c71..eab39b10 100644 --- a/cmake/FindModule.cmake +++ b/cmake/FindModule.cmake @@ -21,7 +21,7 @@ function(find_module module name) if ("${${module}_LIBRARY_DIR}" STREQUAL "${module}_LIBRARY_DIR-NOTFOUND") message(FATAL_ERROR "${name} not found in ${path}") endif() - + add_library(${module} SHARED IMPORTED) set_target_properties(${module} PROPERTIES IMPORTED_LOCATION ${${module}_LIBRARY_DIR} diff --git a/cmake/external_libs/gflags.cmake b/cmake/external_libs/gflags.cmake index 50cfb2bc..f3f0f0ef 100755 --- a/cmake/external_libs/gflags.cmake +++ b/cmake/external_libs/gflags.cmake @@ -23,7 +23,6 @@ ExternalProject_Add(gflags_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2 - TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake index 303ce464..96ea84b4 100755 --- a/cmake/external_libs/gtest.cmake +++ b/cmake/external_libs/gtest.cmake @@ -10,10 +10,7 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() -if (GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/ge_gtest/release-1.8.0.tar.gz") - set(MD5 "") -elseif (ENABLE_GITEE) +if (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") set(MD5 "") else() @@ -25,9 +22,8 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack- set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(gtest_build URL ${REQ_URL} - TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest - -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON + -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install EXCLUDE_FROM_ALL TRUE diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake index 3c1cd012..c4a52843 100755 --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -5,24 +5,19 @@ endif() include(ExternalProject) set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include) -if (GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip") - set(MD5 "0dc903888211db3a0f170304cd9f3a89") - set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) -#elseif (ENABLE_GITEE) +#if (ENABLE_GITEE) # set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") # set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") -#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") -else() - set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") - set(MD5 "0dc903888211db3a0f170304cd9f3a89") - set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) -endif () +# set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include") +#else() +set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") +set(MD5 "0dc903888211db3a0f170304cd9f3a89") +set(JSON_INCLUDE_DIR ${JSON_SRC_DIR}) +#endif () ExternalProject_Add(json_build URL ${REQ_URL} #URL /home/txd/workspace/cloud_code/pkg/include.zip SOURCE_DIR ${JSON_SRC_DIR} - TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake index 1ee80d2d..9dadb544 100755 --- a/cmake/external_libs/onnx.cmake +++ b/cmake/external_libs/onnx.cmake @@ -6,10 +6,7 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx) set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto) file(MAKE_DIRECTORY ${ONNX_PROTO_DIR}) -if (GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz") - set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") -elseif (ENABLE_GITEE) +if (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") set(MD5 "1bdbcecdd68ea8392630467646776e02") else() @@ -22,7 +19,6 @@ ExternalProject_Add(onnx #URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz #URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345 #SOURCE_DIR ${ONNX_SRC_DIR} - TLS_VERIFY OFF CONFIGURE_COMMAND "" BUILD_COMMAND "" #INSTALL_COMMAND "" diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake index 6334c8a3..c9c6b7d9 100755 --- a/cmake/external_libs/protobuf_shared.cmake +++ b/cmake/external_libs/protobuf_shared.cmake @@ -26,7 +26,6 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(protobuf_build URL ${REQ_URL} - TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR} diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake index e4bbb9a0..6f3e1f53 100755 --- a/cmake/external_libs/protobuf_static.cmake +++ b/cmake/external_libs/protobuf_static.cmake @@ -27,7 +27,6 @@ ExternalProject_Add(protobuf_static_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 - TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake index 58321f04..e9adf780 100755 --- a/cmake/external_libs/protoc.cmake +++ b/cmake/external_libs/protoc.cmake @@ -30,7 +30,6 @@ ExternalProject_Add(protoc_build URL ${REQ_URL} #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 - TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake BUILD_COMMAND $(MAKE) INSTALL_COMMAND $(MAKE) install diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 0f8b6d3a..0bd62ab2 100755 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -10,20 +10,11 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() -if (GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz") - set(MD5 "") -else() - set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz") - set(MD5 "") -endif () - ExternalProject_Add(c_sec_build - URL ${REQ_URL} - #URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz + URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz + #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz #SOURCE_DIR ${GE_CODE_DIR}/../libc_sec PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch - TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 3da80492..3a0f7638 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -157,8 +157,6 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" - "graph/passes/remove_same_const_pass.cc" - "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" "graph/passes/dimension_compute_pass.cc" @@ -524,8 +522,6 @@ set(INFER_SRC_LIST "graph/passes/assign_pass.cc" "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" - "graph/passes/remove_same_const_pass.cc" - "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/transop_symmetry_elimination_pass.cc" "graph/passes/save_pass.cc" "graph/passes/switch_dead_branch_elimination.cc" diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 0b9e9dcc..e92ada05 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -94,9 +94,6 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { for (auto dim : output_descs.at(i).GetShape().GetDims()) { output.mutable_shape()->add_dim(dim); } - for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) { - output.mutable_origin_shape()->add_dim(dim); - } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -121,9 +118,6 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { for (auto dim : input_descs.at(i).GetShape().GetDims()) { input.mutable_shape()->add_dim(dim); } - for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) { - input.mutable_origin_shape()->add_dim(dim); - } int64_t input_size = 0; if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -220,15 +214,8 @@ Status DumpOp::LaunchDumpOp() { SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), dump_path.c_str()); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); - } + aicpu::dump::Task task; - task.set_task_id(task_id); - task.set_stream_id(stream_id); task.mutable_op()->set_op_name(op_desc_->GetName()); task.mutable_op()->set_op_type(op_desc_->GetType()); if (dump_properties_.GetDumpMode() == kDumpOutput) { diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 0cc7d553..e3d2282a 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -184,7 +184,7 @@ void TBEPluginManager::LoadCustomOpLib() { std::string fmk_type = std::to_string(domi::TENSORFLOW); auto it = options_.find(ge::FRAMEWORK_TYPE); if (it != options_.end()) { - fmk_type = it->second; + fmk_type = it->second; } std::vector registration_datas = domi::OpRegistry::Instance()->registrationDatas; GELOGI("The size of registration_datas is: %zu", registration_datas.size()); @@ -192,7 +192,7 @@ void TBEPluginManager::LoadCustomOpLib() { if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) { GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); - (void)domi::OpRegistry::Instance()->Register(reg_data); + domi::OpRegistry::Instance()->Register(reg_data); } } } diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 43ed6434..d6740d8b 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -182,7 +182,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le command.module_index = prof_config_param->profSwitch; } GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), - command.module_index); + command.module_index); if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 73d0cc3c..86ae7dca 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -38,8 +38,10 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() - : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) { +ProfilingManager::ProfilingManager() : is_load_profiling_(false), + is_execute_profiling_(false), + is_training_trace_(false), + subscribe_count_(0) { prof_cb_.msprofCtrlCallback = nullptr; prof_cb_.msprofReporterCallback = nullptr; } @@ -100,8 +102,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return INTERNAL_ERROR; } is_execute_profiling_ = true; - GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, - options.profiling_options.c_str()); + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), + prof_conf.options, options.profiling_options.c_str()); } else { (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); @@ -141,9 +143,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { } try { Json prof_options = Json::parse(options); - if (options.find(kTrainingTrace) == std::string::npos) { - return ge::SUCCESS; - } const std::string training_trace = prof_options[kTrainingTrace]; if (training_trace.empty()) { GELOGI("Training trace will not take effect."); @@ -212,16 +211,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t block_dim = task.block_dim; uint32_t task_id = task.task_id; uint32_t stream_id = task.stream_id; - std::string shape_type = task.shape_type; - int64_t cur_iter_num = task.cur_iter_num; data = model_name.append(" ") .append(op_name).append(" ") - .append(std::to_string(block_dim)).append(" ") + .append(std::to_string(block_dim).append(" ") .append(std::to_string(task_id)).append(" ") .append(std::to_string(stream_id)).append(" ") - .append(std::to_string(model_id)).append(" ") - .append(shape_type).append(" ") - .append(std::to_string(cur_iter_num)).append("\n"); + .append(std::to_string(model_id)).append("\n")); ReporterData reporter_data{}; reporter_data.deviceId = device_id; @@ -846,7 +841,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP return; } } - return; } diff --git a/ge/common/proto/op_mapping_info.proto b/ge/common/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/ge/common/proto/op_mapping_info.proto +++ b/ge/common/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 18d78696..6bf55fed 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -209,33 +209,19 @@ bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, namespace ge { bool GeExecutor::isInit_ = false; - -static void InitOpsProtoManger() { - string opsproto_path; - const char *path_env = std::getenv("ASCEND_OPP_PATH"); - if (path_env != nullptr) { - string path = path_env; - string file_path = RealPath(path.c_str()); - if (file_path.empty()) { - GELOGE(FAILED, "File path %s is invalid.", path.c_str()); - return; +class ModelListenerAdapter : public ModelListener { + public: + domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode, + std::vector &outputs) { + if (listener == nullptr) { + GELOGE(ge::FAILED, "listener is null."); + return FAILED; } - opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); - GELOGI("Get opsproto so path from env : %s", path.c_str()); - } else { - string path_base = PluginManager::GetPath(); - GELOGI("path_base is %s", path_base.c_str()); - path_base = path_base.substr(0, path_base.rfind('/')); - path_base = path_base.substr(0, path_base.rfind('/') + 1); - opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); - } - - GELOGI("Get opsproto path is %s", opsproto_path.c_str()); - OpsProtoManager *manager = OpsProtoManager::Instance(); - map option_tmp; - option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); - (void)manager->Initialize(option_tmp); -} + return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs); + } + + std::shared_ptr listener; +}; GeExecutor::GeExecutor() {} @@ -246,16 +232,6 @@ Status GeExecutor::Initialize() { return ge::SUCCESS; } - OpTilingManager::GetInstance().LoadSo(); - - Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); - if (initHostCpuEngineStatus != SUCCESS) { - GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); - return initHostCpuEngineStatus; - } - - InitOpsProtoManger(); - std::vector mem_type(1, RT_MEMORY_HBM); mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); @@ -560,6 +536,60 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add return SUCCESS; } +// Load model +Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, + int32_t priority, std::shared_ptr listener) { + GELOGI("load model offline begin."); + if (!isInit_) { + GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return ACL_ERROR_GE_EXEC_NOT_INIT; + } + + string filePath = RealPath(path.c_str()); + if (filePath.empty()) { + GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, + "File path is invalid. please check your text file '%s'.", path.c_str()); + return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; + } + + std::shared_ptr listener_adapter = MakeShared(); + if (listener_adapter == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + listener_adapter->listener = listener; + + Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id); + if (ret != SUCCESS) { + GELOGE(ret, "[GeExecutor] LoadModelFromFile failed"); + return ACL_ERROR_GE_LOAD_MODEL; + } + return SUCCESS; +} + +Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, + std::shared_ptr listener) { + GELOGI("Load model begin."); + if (!isInit_) { + GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return ACL_ERROR_GE_EXEC_NOT_INIT; + } + + std::shared_ptr listener_adapter = MakeShared(); + if (listener_adapter == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + listener_adapter->listener = listener; + + Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id); + if (ret != SUCCESS) { + GELOGE(ret, "[GeExecutor] LoadModel failed."); + return ACL_ERROR_GE_LOAD_MODEL; + } + return ret; +} + Status GeExecutor::UnloadModel(uint32_t model_id) { GELOGD("unload model %u begin.", model_id); if (!isInit_) { @@ -592,6 +622,21 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { return SUCCESS; } +Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { + GELOGI("run model begin."); + if (!isInit_) { + GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return ACL_ERROR_GE_EXEC_NOT_INIT; + } + + InputData inputs; + GetDomiInputData(input_data, inputs); + OutputData outputs; + GetDomiOutputData(output_data, outputs); + + return GraphExecutor::DataInput(inputs, outputs); +} + // Get input and output descriptor Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, bool new_model_desc) { diff --git a/ge/executor/proto/op_mapping_info.proto b/ge/executor/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/ge/executor/proto/op_mapping_info.proto +++ b/ge/executor/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index e20456d5..bfb612ea 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -191,8 +191,6 @@ OMG_HOST_SRC_FILES := \ graph/passes/control_trigger_pass.cc \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ - graph/passes/remove_same_const_pass.cc \ - graph/passes/useless_control_out_remove_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ graph/passes/assign_pass.cc \ diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index e17f73de..c836d4d6 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -39,7 +39,7 @@ namespace { } \ ge_tensor = MakeShared(out_desc); \ GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ + GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ return MEMALLOC_FAILED; \ @@ -50,7 +50,8 @@ namespace { } else { \ ge_tensor = outputs[i]; \ GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ + GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ + reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ } \ auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ auto tensor_name = op_desc->GetOutputNameByIndex(i); \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 9706dadb..25718e9b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -126,8 +126,6 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/compile_nodes_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ - graph/passes/remove_same_const_pass.cc \ - graph/passes/useless_control_out_remove_pass.cc \ graph/passes/control_trigger_pass.cc \ graph/passes/dimension_adjust_pass.cc \ graph/passes/dimension_compute_pass.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index c775c477..383f2b31 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -272,7 +272,6 @@ static void ResetTensorVecShape(const vector &inputs, vector std::vector dynamic_shape_dims = {kDynamicDimValue}; GeShape dynamic_shape(dynamic_shape_dims); - std::vector> dynamic_shape_range; ge::GeTensor inputTensor; ge::GeTensorDesc desc(input_desc); @@ -281,7 +280,6 @@ static void ResetTensorVecShape(const vector &inputs, vector (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); if (!is_const && shape_ori.GetDims().size() > 0) { desc.SetShape(dynamic_shape); - desc.SetShapeRange(dynamic_shape_range); } inputTensor.SetTensorDesc(desc); @@ -530,6 +528,24 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { return true; } +static Status SetModelNameForDump(GeRootModelPtr ge_root_model) { + ModelHelper model_helper; + string model_name = ""; + GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); + Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), + model_name); + if (name_ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); + GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); + return PARAM_INVALID; + } + map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); + GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; + GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); + ge_model->SetName(model_name); + return SUCCESS; +} + Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, ModelBufferData &model, bool is_offline) { rtContext_t ctx = nullptr; @@ -538,7 +554,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr GELOGD("Current ctx is null."); ctx = nullptr; } - GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); impl_->is_offline_ = is_offline; @@ -562,22 +577,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr impl_->build_step_.c_str()); return SUCCESS; } - GE_CHECK_NOTNULL(ge_root_model); - GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); - ModelHelper model_helper; - string model_name = ""; - Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), - model_name); - if (name_ret != SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); - GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); - return PARAM_INVALID; + ret = SetModelNameForDump(ge_root_model); + if (ret != SUCCESS) { + return ret; } - map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; - GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); - ge_model->SetName(model_name); ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); if (ret != SUCCESS) { GELOGE(ret, "Save model failed"); @@ -586,11 +590,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr } return ret; } - if (ctx != nullptr) { (void)rtCtxSetCurrent(ctx); } - return SUCCESS; } diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 40b3b9dc..16d5d38f 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() { MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); - if (mem_assigner->GetP2PMemOffset() >= 0) { + if (mem_assigner->GetP2PMemOffset() > 0) { MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); } diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 3be45895..37eb499a 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -224,7 +224,6 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_ GeTensorDesc &tensor_desc = weight->MutableTensorDesc(); size_t output_size = weight->GetData().size(); TensorUtils::SetDataOffset(tensor_desc, mem_offset); - GELOGD("Node: %s, weight size: %zu.", node->GetName().c_str(), output_size); mem_offset += output_size; } return SUCCESS; diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 05049818..ea0a7698 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -66,13 +66,13 @@ bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &com if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { label_set.insert(batch_label); } else { - GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(), + GELOGD("Node %s[%s] has no batch_label, subgraph %s, stream id: %ld ", cur_node->GetName().c_str(), cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id); continue; } - GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(), - comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); + GELOGD("Node %s in subgraph %s stream id: %ld, batch_label: %s, node num: %zu", cur_node->GetName().c_str(), + comp_graph->GetName().c_str(), stream_id, batch_label.c_str(), comp_graph->GetDirectNodesSize()); } if (stream_set.size() > 1 || label_set.size() > 1) { GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.", @@ -126,12 +126,14 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com run_context.graphStreamList.size()); return FAILED; } + run_context.stream = run_context.graphStreamList[stream_id]; - std::string batch_label; - (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label); + std::string batch_label; + (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label); GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, " - "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, + "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, static_cast(reinterpret_cast(run_context.stream)), batch_label.c_str()); + for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) { GE_CHECK_NOTNULL(*iter); Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context); diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index cb68533e..44556422 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string ModelData &model_data) { Status ret; if (!CheckInputPathValid(path)) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); - return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; + GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); + return GE_EXEC_MODEL_PATH_INVALID; } GELOGI("Load model begin, model path is: %s", path.c_str()); if (!key_path.empty() && !CheckInputPathValid(key_path)) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); - return ACL_ERROR_GE_PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); + return GE_EXEC_MODEL_KEY_PATH_INVALID; } ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); @@ -144,6 +144,63 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string return SUCCESS; } +Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, + const std::shared_ptr &listener, uint32_t &model_id) { + Status ret; + ModelData model_data; + ret = LoadDataFromFile(path, key_path, priority, model_data); + if (ret != SUCCESS) { + GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); + if (model_data.model_data != nullptr) { + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + } + return ret; + } + + ret = LoadModel(model_data, listener, model_id); + if (ret != SUCCESS) { + GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); + if (model_data.model_data != nullptr) { + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + } + } + + if (model_data.model_data != nullptr) { + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + } + + return ret; +} + +Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr &listener, + uint32_t &model_id) { + GELOGI("Load model begin, model_id:%u.", model_id); + + // For GeOp, Open Device 0 here. + GE_CHK_RT_RET(rtSetDevice(0)); + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->LoadModelOffline(model_id, model_data, listener); + if (ret != SUCCESS) { + GE_CHK_RT(rtDeviceReset(0)); + GELOGE(ret, "LoadModel: Load failed."); + return ret; + } + ret = model_manager->Start(model_id); + if (ret != SUCCESS) { + if (model_manager->Unload(model_id) != SUCCESS) { + GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start."); + } + GELOGE(ret, "LoadModel: Start failed."); + return ret; + } + GELOGI("LoadModel: Start model success, model_id:%u.", model_id); + return SUCCESS; +} + Status GraphLoader::CommandHandle(const Command &command) { try { auto model_manager = ModelManager::GetInstance(); @@ -168,16 +225,16 @@ Status GraphLoader::CommandHandle(const Command &command) { } Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, - size_t mem_size, void *weight_ptr, size_t weight_size) { + size_t memsize, void *weight_ptr, size_t weightsize) { GELOGI("Load model begin, model_id:%u.", model_id); // For ACL, Open Device from App. auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelOffline( - model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size); + model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); if (ret != SUCCESS) { - GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id); - return ACL_ERROR_GE_LOAD_MODEL; + GELOGE(ret, "Load model failed, model_id:%u.", model_id); + return ret; } GELOGI("Load model success, model_id:%u.", model_id); return SUCCESS; @@ -202,8 +259,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); if (ret != SUCCESS) { - GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id); - return ACL_ERROR_GE_LOAD_MODEL; + GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id); + return ret; } GELOGI("Load model with queue success, model_id:%u.", model_id); diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index 3a13a113..974af5c1 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -44,6 +44,12 @@ class GraphLoader { static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size); + static Status LoadModel(const ModelData &model_data, const std::shared_ptr &listener, + uint32_t &model_id); + + static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, + const std::shared_ptr &listener, uint32_t &model_id); + static Status CommandHandle(const Command &command); static Status GetMemoryInfo(int64_t &free); diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index 6f65e907..b331d780 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -319,9 +319,6 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { output.mutable_shape()->add_dim(dim); } - for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { - output.mutable_origin_shape()->add_dim(dim); - } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -479,9 +476,6 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { input.mutable_shape()->add_dim(dim); } - for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { - input.mutable_origin_shape()->add_dim(dim); - } int64_t input_size = 0; if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { GELOGI("Get aipp input size according to attr is %ld", input_size); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 6c873fc2..bc755e07 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh if (weight_ptr == nullptr) { weights_mem_base_ = MallocWeightsMem(weights_size); if (weights_mem_base_ == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); + return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; } is_inner_weight_base_ = true; } @@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (is_feature_map_mem_has_inited_) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once ."); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(FAILED, "call InitFeatureMapMem more than once ."); + return FAILED; } is_feature_map_mem_has_inited_ = true; @@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); + return FAILED; } mem_base_ = static_cast(dev_ptr); @@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); if (mem_base_ == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); + return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); @@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (p2p_data_size != 0) { p2p_mem_base_ = MallocP2PMem(p2p_data_size); if (p2p_mem_base_ == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); + return GE_EXEC_ALLOC_P2P_MEM_FAILED; } GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, p2p_mem_base_, p2p_data_size); @@ -710,7 +710,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size } // collect profiling for ge - GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed"); auto &profiling_manager = ProfilingManager::Instance(); if (profiling_manager.ProfilingModelLoadOn()) { Status p_ret = ReportProfilingData(); @@ -971,7 +970,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma uint32_t parent_index = 0; // Ignore subgraph Data Node. if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); - return SUCCESS; + return InitInputBatchLabel(node); } data_op_list_.push_back(op_desc); @@ -1012,6 +1011,10 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma } data_op_index++; + if (InitInputZeroCopyNodes(node) != SUCCESS) { + GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); + return PARAM_INVALID; + } return SUCCESS; } @@ -1033,6 +1036,39 @@ void DavinciModel::AdjustDataOpList(const map &data_by_inde } } +/// +/// @ingroup ge +/// @brief input zero copy node Initialize. +/// @param [in] NodePtr: Data Op. +/// @return Status +/// +Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { + auto out_data_anchor = node->GetOutDataAnchor(kDataIndex); + if (out_data_anchor == nullptr) { + GELOGE(FAILED, "Out data anchor is nullptr"); + return FAILED; + } + for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + auto node = peer_in_data_anchor->GetOwnerNode(); + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(FAILED, "Op desc is nullptr"); + return FAILED; + } + string batch_label; + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (batch_label.empty()) { + batch_label = kDefaultBatchLable; + } + if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { + zero_copy_op_id_batch_label_.emplace(pair(op_desc->GetId(), batch_label)); + GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(), + op_desc->GetId(), batch_label.c_str()); + } + } + return SUCCESS; +} + bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { bool getnext_sink_dynamic = false; if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { @@ -1058,7 +1094,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { if (owner_graph->GetParentGraph() != nullptr) { GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); op_list_.erase(op_desc->GetId()); - return SUCCESS; + return InitOutputBatchLabel(node); } output_op_list_.push_back(op_desc); @@ -1110,6 +1146,8 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { } } + GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, + GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;); GetAllGearsInfo(node); if (is_getnext_sink_dynamic_) { GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, @@ -1305,6 +1343,121 @@ void DavinciModel::ParseDynamicOutShape(const std::vector &str_info } } +/// +/// @ingroup ge +/// @brief output zero copy node Initialize. +/// @param [in] NodePtr: netoutput Op. +/// @return Status +/// +Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { + set nodes_need_record; + for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor == nullptr) { + continue; + } + auto peer_node = peer_out_data_anchor->GetOwnerNode(); + nodes_need_record.emplace(peer_node); + + // Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios + if (peer_node->GetType() == MERGE) { + for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) { + auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor(); + if (merge_peer_out_data_anchor == nullptr) { + continue; + } + auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode(); + nodes_need_record.emplace(merge_peer_node); + } + } else { + for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) { + auto other_in_node = other_in_data_anchor->GetOwnerNode(); + if (other_in_node->GetType() != NETOUTPUT) { + nodes_need_record.emplace(other_in_node); + } + } + } + } + + for (const auto &node_need_record : nodes_need_record) { + auto op_desc = node_need_record->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + string batch_label; + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (batch_label.empty()) { + batch_label = kDefaultBatchLable; + } + if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { + zero_copy_op_id_batch_label_.emplace(pair(op_desc->GetId(), batch_label)); + GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", + op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); + } + } + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief input zero copy node Initialize. +/// @param [in] NodePtr: Data Op. +/// @return Status +/// +Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { + string batch_label; + if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { + return SUCCESS; // Not Multi-batch. + } + + const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); + GE_CHECK_NOTNULL(out_data_anchor); + + for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + const auto &node = peer_in_data_anchor->GetOwnerNode(); + const auto &op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { + zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; + GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), + op_desc->GetId(), batch_label.c_str()); + } + } + + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief output zero copy node Initialize for Case. +/// @param [in] NodePtr: netoutput Op. +/// @return Status +/// +Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { + string batch_label; + if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { + return SUCCESS; // Not Multi-batch. + } + + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor == nullptr) { + continue; + } + + const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); + const auto &op_desc = peer_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { + zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; + GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", + op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); + } + } + + return SUCCESS; +} + /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. @@ -2087,61 +2240,12 @@ Status DavinciModel::SyncVarData() { return ret; } -Status DavinciModel::InitModelProfile() { - for (const auto &task : task_list_) { - GE_CHECK_NOTNULL(task); - const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); - // when type is RT_MODEL_TASK_KERNEL, ctx is not null - if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { - continue; - } - - GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size()); - op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID())); - } - - std::set task_id_set; - using CIT = std::multimap::const_iterator; - using Range = std::pair; - for (const auto &task : task_list_) { - GE_CHECK_NOTNULL(task); - const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); - if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { - continue; - } - - if (task_id_set.count(task->GetTaskID()) > 0) { - continue; - } - - const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); - GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index); - - ProfileInfo profile; - profile.fusion_info = *fusion_op_info; - Range range = op_id_map_.equal_range(fusion_op_info->op_index); - for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { - profile.task_count++; - task_id_set.insert(range_idx->second); - } - - // memory info - TaskMemInfo &mem_info = profile.memory_info; - const auto input_size = ModelUtils::GetInputSize(op_desc); - const auto output_size = ModelUtils::GetOutputSize(op_desc); - const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc); - const auto weight_size = ModelUtils::GetWeightSize(op_desc); - mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0); - mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0); - mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0); - mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0); - mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size; - - profile_list_.emplace_back(profile); +inline int64_t SumSize(const vector &size_list) { + int64_t sum_size = 0; + for (const int64_t &size : size_list) { + sum_size += size; } - - GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size()); - return SUCCESS; + return sum_size; } Status DavinciModel::SinkModelProfile() { @@ -2149,12 +2253,18 @@ Status DavinciModel::SinkModelProfile() { auto &prof_mgr = ProfilingManager::Instance(); ReporterData reporter_data{}; // report model data tag name - std::string tag_name("model_load_info_" + std::to_string(this->Id())); + std::string tag_name; + tag_name.append("model_load_info_").append(std::to_string(this->Id())); GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); // Model Header - std::string name = om_name_.empty() ? name_ : om_name_; + string name; + if (!om_name_.empty()) { + name = om_name_; + } else { + name = name_; + } size_t name_len = name.size(); reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; @@ -2186,71 +2296,128 @@ Status DavinciModel::SinkModelProfile() { GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); + int32_t task_num = task_list_.size(); + std::multimap op_id_map; + std::set task_id_set; + for (int32_t i = 0; i < task_num; i++) { + auto task = task_list_[i]; + GE_CHECK_NOTNULL(task); + auto fusion_op_info = task->GetFusionOpInfo(); + // when type is RT_MODEL_TASK_KERNEL, ctx is not null + if (fusion_op_info != nullptr) { + uint32_t op_num = fusion_op_info->original_op_names.size(); + uint32_t task_id = task->GetTaskID(); + if (op_num > 0) { + GELOGI("task.id = %u, opNum = %u", task_id, op_num); + op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); + } + } + } + + struct memoryInfo { + int64_t input_size; + int64_t output_size; + int64_t weight_size; + int64_t workspace_size; + int64_t total_size; + + memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {} + }; + using CIT = std::multimap::const_iterator; using Range = std::pair; - for (const ProfileInfo &profile : profile_list_) { - // op name after fusion - string fusion_op_name = profile.fusion_info.op_name; - int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); - reporter_data.data = (unsigned char *)&fusion_op_name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - reporter_data.data = (unsigned char *)fusion_op_name.c_str(); - reporter_data.dataLen = fusion_op_name_len; - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // original op name before fusion - uint32_t op_num = profile.fusion_info.original_op_names.size(); - reporter_data.data = (unsigned char *)&op_num; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - for (uint32_t k = 0; k < op_num; k++) { - std::string op_name = profile.fusion_info.original_op_names[k]; - int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); - reporter_data.data = (unsigned char *)&op_name_len; + for (int32_t i = 0; i < task_num; i++) { + auto task = task_list_[i]; + GE_CHECK_NOTNULL(task); + auto fusion_op_info = task->GetFusionOpInfo(); + if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) { + uint32_t task_id = task->GetTaskID(); + uint32_t op_num = fusion_op_info->original_op_names.size(); + uint32_t task_count = 0; + if (task_id_set.count(task_id) != 0) { + continue; + } + + uint32_t op_id = fusion_op_info->op_index; + Range range = op_id_map.equal_range(op_id); + for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { + task_count++; + uint32_t task_id = range_idx->second; + task_id_set.insert(task_id); + } + + // op name after fusion + string fusion_op_name = fusion_op_info->op_name; + int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); + reporter_data.data = (unsigned char *)&fusion_op_name_len; reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - reporter_data.data = (unsigned char *)op_name.c_str(); - reporter_data.dataLen = op_name_len; + + reporter_data.data = (unsigned char *)fusion_op_name.c_str(); + reporter_data.dataLen = fusion_op_name_len; + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // original op name before fusion + reporter_data.data = (unsigned char *)&op_num; + reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - } - // stream id info - uint32_t streamId = profile.fusion_info.stream_id; - reporter_data.data = (unsigned char *)&streamId; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // memory info - reporter_data.data = (unsigned char *)&profile.memory_info; - reporter_data.dataLen = sizeof(profile.memory_info); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // task info - reporter_data.data = (unsigned char *)&profile.task_count; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); - for (CIT idx = task_range.first; idx != task_range.second; ++idx) { - uint32_t task_id = idx->second; - reporter_data.data = (unsigned char *)&task_id; + for (uint32_t k = 0; k < op_num; k++) { + std::string op_name = fusion_op_info->original_op_names[k]; + int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); + reporter_data.data = (unsigned char *)&op_name_len; + reporter_data.dataLen = sizeof(int32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + reporter_data.data = (unsigned char *)op_name.c_str(); + reporter_data.dataLen = op_name_len; + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + } + + // stream id info + uint32_t streamId = task->GetStreamId(); + reporter_data.data = (unsigned char *)&streamId; + reporter_data.dataLen = sizeof(int32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // memory info + struct memoryInfo memory_info; + uint32_t op_index = fusion_op_info->op_index; + auto iter = op_list_.find(op_index); + GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index); + auto op_desc = iter->second; + memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc)); + memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc)); + memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); + memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); + memory_info.total_size = + memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; + reporter_data.data = (unsigned char *)&memory_info; + reporter_data.dataLen = sizeof(struct memoryInfo); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // task info + reporter_data.data = (unsigned char *)&task_count; reporter_data.dataLen = sizeof(uint32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); + + Range task_range = op_id_map.equal_range(op_id); + for (CIT idx = task_range.first; idx != task_range.second; ++idx) { + uint32_t task_id = idx->second; + reporter_data.data = (unsigned char *)&task_id; + reporter_data.dataLen = sizeof(uint32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + } } } - return SUCCESS; } @@ -2824,19 +2991,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const return SUCCESS; } -Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs) { - for (size_t i = 0; i < total_io_addrs.size(); ++i) { - auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); +Status DavinciModel::UpdateKnownZeroCopyAddr() { + for (size_t i = 0; i < total_io_addrs_.size(); ++i) { + auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); if (it_in != knonw_input_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_input_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], + knonw_input_data_info_.at(total_io_addrs_[i])); + total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); } - auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); + auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); if (it_out != knonw_output_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_output_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], + knonw_output_data_info_.at(total_io_addrs_[i])); + total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); } } GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); @@ -2865,7 +3032,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } else { total_io_addrs_ = orig_total_io_addrs_; } - GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); + GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); if (total_args_size_ == 0) { GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); @@ -2932,14 +3099,7 @@ Status DavinciModel::MallocKnownArgs() { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - // malloc dynamic and static hybrid memory - if (total_hybrid_args_size_ != 0) { - rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - } + // malloc fixed addr memory, eg: rts op if (total_fixed_addr_size_ != 0) { GELOGI("Begin to allocate fixed addr."); @@ -2993,7 +3153,9 @@ Status DavinciModel::DistributeTask() { } auto task_type = static_cast(task_def.type()); - bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX); + bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) + && (task_type != RT_MODEL_TASK_KERNEL_EX) + && (task_type != RT_MODEL_TASK_HCCL); GE_IF_BOOL_EXEC(no_need_profiling, continue); SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); @@ -3008,8 +3170,6 @@ Status DavinciModel::DistributeTask() { task_desc_info.block_dim = task_def.kernel().block_dim(); task_desc_info.task_id = task->GetTaskID(); task_desc_info.stream_id = task->GetStreamId(); - task_desc_info.shape_type = "static"; - task_desc_info.cur_iter_num = 0; task_desc_info_.emplace_back(task_desc_info); if (flag) { if (task->GetSktTaskID() != 0xFFFFFFFF) { @@ -3097,20 +3257,27 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector(args) + offset + i * kAddrLen; + SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); + } } for (auto &output_outside_addrs : new_output_outside_addrs_) { ZeroCopyOffset &output_outside = output_outside_addrs.second; - output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); + bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); + if (ret) { + void *args_val = static_cast(args) + offset + i * kAddrLen; + SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); + } } } - - string batch_label; - if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) { + auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); + if (it == zero_copy_op_id_batch_label_.end()) { zero_copy_task.SetBatchLabel(kDefaultBatchLable); } else { - zero_copy_task.SetBatchLabel(batch_label); + zero_copy_task.SetBatchLabel(it->second); } std::lock_guard lock(outside_addrs_mutex_); @@ -3120,6 +3287,27 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vectorGetId()); + if (it == zero_copy_op_id_batch_label_.end()) { + return; + } + + const string &batch_label = it->second; + auto iter = zero_copy_batch_label_addrs_.find(batch_label); + if (iter != zero_copy_batch_label_addrs_.end()) { + iter->second.insert(addr); + GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(), + op_desc->GetName().c_str()); + } else { + set addrs = {addr}; + zero_copy_batch_label_addrs_.emplace(pair>(batch_label, addrs)); + GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.", + batch_label.c_str(), op_desc->GetName().c_str()); + } +} + /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -3253,15 +3441,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & void *addr = data.second.GetDataInfo().at(count).second; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data) + data.second.GetRelativeOffset().at(count)); - GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s", - input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str()); + GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), + data.first, addr, size, buffer_addr); // For input data, just copy for rts task. for (ZeroCopyTask &task : zero_copy_tasks_) { if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { continue; } uintptr_t addr_val = reinterpret_cast(addr); - if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) { + if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { return FAILED; } } @@ -3623,6 +3811,9 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); is_dynamic_ = input_data.is_dynamic_batch; + if (!is_dynamic_) { + zero_copy_batch_label_addrs_.clear(); + } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); Status ret = CopyModelData(input_data, output_data, is_dynamic_); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index be8efd90..19888e1f 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -76,20 +76,6 @@ struct timeInfo { int64_t dumpEndTime; }; -struct TaskMemInfo { - int64_t input_size{0}; - int64_t output_size{0}; - int64_t weight_size{0}; - int64_t workspace_size{0}; - int64_t total_size{0}; -}; - -struct ProfileInfo { - FusionOpInfo fusion_info; - TaskMemInfo memory_info; - uint32_t task_count{0}; -}; - enum ExecuteMode { INITIALIZATION, SYNCHRONIZATION, @@ -240,6 +226,8 @@ class DavinciModel { const vector &GetDataList() const { return data_op_list_; } // get Op + const map &GetOpList() const { return op_list_; } + OpDescPtr GetOpByIndex(uint32_t index) const { if (op_list_.find(index) == op_list_.end()) { return nullptr; @@ -448,6 +436,10 @@ class DavinciModel { int64_t GetLoadEndTime() { return load_end_time_; } + Status SinkModelProfile(); + + Status SinkTimeProfile(const InputData ¤t_data); + Status ReportProfilingData(); void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { @@ -484,14 +476,6 @@ class DavinciModel { void SetTotalIOAddrs(vector &io_addrs) { total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); } - void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } - uint32_t GetHybridArgsSize() { - return total_hybrid_args_size_; - } - void *GetCurrentHybridArgsAddr(uint32_t offset) { - void *cur_args = static_cast(hybrid_addrs_) + offset; - return cur_args; - } void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); int64_t GetFixedAddrsSize(string tensor_name); void *GetCurrentFixedAddr(int64_t offset) const { @@ -510,7 +494,7 @@ class DavinciModel { Status MallocKnownArgs(); Status UpdateKnownNodeArgs(const vector &inputs, const vector &outputs); Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); - Status UpdateKnownZeroCopyAddr(vector &total_io_addrs); + Status UpdateKnownZeroCopyAddr(); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); @@ -545,6 +529,15 @@ class DavinciModel { struct timeInfo time_info_; int32_t dataInputTid; + /// + /// @ingroup ge + /// @brief Save Batch label Info. + /// @param [in] const OpDescPtr &op_desc + /// @param [in] uintptr_t addr: address value in args block. + /// @return None. + /// + void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr); + /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -656,6 +649,14 @@ class DavinciModel { /// void AdjustDataOpList(const map &data_by_index); + /// + /// @ingroup ge + /// @brief input zero copy node Initialize. + /// @param [in] NodePtr: Data Op. + /// @return Status + /// + Status InitInputZeroCopyNodes(const NodePtr &node); + /// /// @ingroup ge /// @brief NetOutput Op Initialize. @@ -664,6 +665,30 @@ class DavinciModel { /// Status InitNetOutput(const NodePtr &node); + /// + /// @ingroup ge + /// @brief output zero copy node Initialize. + /// @param [in] NodePtr: Data Op. + /// @return Status + /// + Status InitOutputZeroCopyNodes(const NodePtr &node); + + /// + /// @ingroup ge + /// @brief input zero copy node Initialize for Case. + /// @param [in] NodePtr: Data Op. + /// @return Status + /// + Status InitInputBatchLabel(const NodePtr &node); + + /// + /// @ingroup ge + /// @brief output zero copy node Initialize for Case. + /// @param [in] NodePtr: netoutput Op. + /// @return Status + /// + Status InitOutputBatchLabel(const NodePtr &node); + /// /// @ingroup ge /// @brief Constant Op Init. @@ -812,11 +837,6 @@ class DavinciModel { void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); - Status InitModelProfile(); - Status SinkModelProfile(); - - Status SinkTimeProfile(const InputData ¤t_data); - Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, std::vector &outputs); @@ -894,6 +914,11 @@ class DavinciModel { std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. std::set copy_only_addrs_; // Address need copy to original place. + // {op_id, batch_label} + std::map zero_copy_op_id_batch_label_; + // {batch_label, addrs} + std::map> zero_copy_batch_label_addrs_; + std::vector task_list_; // rt_moodel_handle rtModel_t rt_model_handle_; @@ -952,8 +977,6 @@ class DavinciModel { void *args_ = nullptr; void *args_host_ = nullptr; void *fixed_addrs_ = nullptr; - void *hybrid_addrs_ = nullptr; - uint32_t total_hybrid_args_size_ = 0; int64_t total_fixed_addr_size_ = 0; std::map knonw_input_data_info_; std::map knonw_output_data_info_; @@ -993,9 +1016,6 @@ class DavinciModel { // key: input_index: input is merge node; value: each gear info and each output shape std::map, vector>> merge_nodes_gear_and_real_out_shape_info_; std::vector> all_gears_info_; - - std::multimap op_id_map_; - std::vector profile_list_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 0dbeb38e..4c2d4530 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -89,7 +89,6 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); - std::lock_guard lock(map_mutex_); auto iter = model_aicpu_kernel_.find(model_key); if (iter != model_aicpu_kernel_.end()) { GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); @@ -177,7 +176,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } void ModelManager::DestroyAicpuSession(uint64_t session_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(sess_ids_mutex_); auto it = sess_ids_.find(session_id); if (it == sess_ids_.end()) { GELOGI("The session: %lu not created.", session_id); @@ -206,7 +205,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { } ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto hybrid_davinci_model = hybrid_model_map_.find(model_id); if (hybrid_davinci_model != hybrid_model_map_.end()) { uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); @@ -216,8 +215,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { auto it = model_map_.find(model_id); if (it == model_map_.end()) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); - return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; + GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); + return GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); DestroyAicpuSession(session_id); @@ -226,7 +225,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); @@ -239,7 +238,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { @@ -251,7 +250,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i } ModelManager::~ModelManager() { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); cust_aicpu_so_.clear(); @@ -359,18 +358,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr &davinci_model) { GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); model_map_[id] = davinci_model; } void ModelManager::InsertModel(uint32_t id, shared_ptr &hybrid_model) { GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); hybrid_model_map_[id] = hybrid_model; } Status ModelManager::DeleteModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); auto hybrid_model_it = hybrid_model_map_.find(id); @@ -385,22 +384,22 @@ Status ModelManager::DeleteModel(uint32_t id) { } else if (hybrid_model_it != hybrid_model_map_.end()) { (void)hybrid_model_map_.erase(hybrid_model_it); } else { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); - return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; + GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); + return GE_EXEC_MODEL_ID_INVALID; } return SUCCESS; } std::shared_ptr ModelManager::GetModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); return (it == model_map_.end()) ? nullptr : it->second; } std::shared_ptr ModelManager::GetHybridModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = hybrid_model_map_.find(id); return (it == hybrid_model_map_.end()) ? nullptr : it->second; @@ -903,7 +902,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); davinci_model->SetModelDescVersion(new_model_desc); @@ -971,9 +970,8 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, } Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { - auto davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetCurShape Failed, Invalid Model ID %u!", model_id); + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHECK_NOTNULL(davinci_model); davinci_model->GetCurShape(batch_info, dynamic_type); return SUCCESS; } @@ -986,8 +984,7 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetModelAttr Failed, Invalid Model ID %u!", model_id); + GE_CHECK_NOTNULL(davinci_model); davinci_model->GetModelAttr(dynamic_output_shape_info); return SUCCESS; } @@ -997,8 +994,9 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &inputFormats, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", + model_id); + return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } @@ -1013,14 +1011,18 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", model_id); + "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + return davinci_model->GetAIPPInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", model_id); + "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + return davinci_model->GetAippType(index, type, aipp_index); } @@ -1053,15 +1055,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model mmTimespec timespec = mmGetTickCount(); ModelHelper model_helper; - Status ret = model_helper.LoadRootModel(model); - if (model_helper.GetModelType()) { - bool is_shape_unknown = false; - GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), - "CheckIsUnknownShape failed, model id:%u", model_id); - if (is_shape_unknown || GetContext().GetHostExecFlag()) { - return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); - } - } + Status ret = model_helper.LoadModel(model); if (ret != SUCCESS) { GELOGE(ret, "load model failed."); return ret; @@ -1075,8 +1069,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; } catch (...) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise"); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); + return INTERNAL_ERROR; } ret = davinci_model->Assign(ge_model); if (ret != SUCCESS) { @@ -1088,7 +1082,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE || device_id < 0) { - GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model->SetDeviceId(device_id); @@ -1220,7 +1214,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "Invalid model id %u, check whether model has been loaded or not.", model_id); + "Invalid model id %u, check weather model has been loaded or not.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); @@ -1243,7 +1237,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy } Status ModelManager::CreateAicpuSession(uint64_t session_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(sess_ids_mutex_); auto it = sess_ids_.find(session_id); // never been created by any model if (it == sess_ids_.end()) { @@ -1462,7 +1456,8 @@ void ModelManager::GenModelId(uint32_t *id) { if (id == nullptr) { return; } - std::lock_guard lock(map_mutex_); + + std::lock_guard lock(map_mutex_); *id = ++max_model_id_; } diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 99af8415..fc98d9c2 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -353,7 +353,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::map> hybrid_model_map_; std::map> model_aicpu_kernel_; uint32_t max_model_id_; - std::recursive_mutex map_mutex_; + std::mutex map_mutex_; + std::mutex sess_ids_mutex_; std::mutex session_id_create_mutex_; static::std::mutex exeception_infos_mutex_; uint64_t session_id_bias_; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index ce31ef30..74faeb24 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -90,18 +90,20 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names; fusion_op_info_.op_name = op_desc_->GetName()); + string session_graph_model_id; + davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); + // get bin_file_key + const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); // new aicpu kernel(rtCpuKernelLaunch) no need to check function if (kernel_type_ == ccKernelType::CCE_AI_CORE) { - rtError_t rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); + rtError_t rt_ret; + rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", kernel_def.stub_func().c_str()); return RT_ERROR_TO_GE_STATUS(rt_ret);); } else if (kernel_type_ == ccKernelType::TE) { - // get bin_file_key - string session_graph_model_id; - davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); - const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); - rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); + rtError_t rt_ret; + rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); return RT_ERROR_TO_GE_STATUS(rt_ret);); @@ -370,11 +372,7 @@ Status KernelTaskInfo::SuperKernelDistribute() { Status KernelTaskInfo::Distribute() { GELOGD("KernelTaskInfo Distribute Start."); if (davinci_model_->IsKnownNode()) { - if (kernel_type_ == ccKernelType::TE) { - args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); - } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { - args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); - } + args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); } rtError_t rt_ret = RT_ERROR_NONE; @@ -430,31 +428,36 @@ Status KernelTaskInfo::UpdateArgs() { const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); vector input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); vector output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); + vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); vector io_addrs; - io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); - io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); - if (kernel_type_ == ccKernelType::TE) { - vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); + if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { + io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); + io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); - davinci_model_->SetTotalIOAddrs(io_addrs); - } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { - davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); - uintptr_t io_addr = reinterpret_cast(args_addr.get()) + sizeof(aicpu::AicpuParamHead); - auto addrs_size = sizeof(uint64_t) * io_addrs.size(); - errno_t sec_ret = memcpy_s(reinterpret_cast(io_addr), addrs_size, io_addrs.data(), addrs_size); - if (sec_ret != EOK) { - GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); - return FAILED; - } - // copy args to device - rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); + } else { + string peer_input_name; + if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { + uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); + if (output_index > output_data_addrs.size()) { + GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", + output_data_addrs.size(), output_index); + return FAILED; + } + io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); + for (size_t i = 0; i < output_data_addrs.size(); ++i) { + if (i == output_index) { + void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); + io_addrs.emplace_back(fixed_addr); + continue; + } + io_addrs.emplace_back(output_data_addrs[i]); + } + io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); } } + davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("KernelTaskInfo::UpdateArgs success."); return SUCCESS; } @@ -530,18 +533,33 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { } Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { - const domi::KernelDef &kernel_def = task_def.kernel(); + domi::KernelDef kernel_def = task_def.kernel(); + uint32_t args_size = kernel_def.args_size(); + args_offset_ = davinci_model->GetTotalArgsSize(); + davinci_model->SetTotalArgsSize(args_size); + GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); + + // get opcontext stored in model const domi::KernelContext &context = kernel_def.context(); - kernel_type_ = static_cast(context.kernel_type()); - if (kernel_type_ == ccKernelType::TE) { - uint32_t args_size = kernel_def.args_size(); - args_offset_ = davinci_model->GetTotalArgsSize(); - davinci_model->SetTotalArgsSize(args_size); - GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); - } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { - hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); - davinci_model->SetHybridArgsSize(kernel_def.args_size()); - GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); + // get opdesc + op_desc_ = davinci_model->GetOpByIndex(context.op_index()); + GE_CHECK_NOTNULL(op_desc_); + // alloc fixed addr + string peer_input_name; + if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { + uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); + if (output_index > op_desc_->GetOutputsSize()) { + GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), + output_index); + return FAILED; + } + fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); + auto tensor_desc = op_desc_->GetOutputDesc(output_index); + int64_t tensor_size = 0; + GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); + davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); + GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, + fixed_addr_offset_); } return SUCCESS; } @@ -870,7 +888,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } // copy args to new host memory - args_addr = std::unique_ptr(new (std::nothrow) uint8_t[args_size_]); + std::unique_ptr args_addr(new (std::nothrow) uint8_t[args_size_]); GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); if (sec_ret != EOK) { @@ -878,23 +896,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return FAILED; } - auto aicpu_param_head = reinterpret_cast(args_addr.get()); - const auto &ext_info = kernel_def.kernel_ext_info(); - auto init_ret = InitAicpuTaskExtInfo(ext_info); - if (init_ret != SUCCESS) { - GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); - return init_ret; - } - GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), - op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); - - aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); - aicpu_param_head->extInfoLength = static_cast(ext_info.size()); - - if (davinci_model_->IsKnownNode()) { - return SUCCESS; - } const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); + vector input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); vector output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); vector io_addrs; @@ -911,6 +914,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } } + auto aicpu_param_head = reinterpret_cast(args_addr.get()); + const auto &ext_info = kernel_def.kernel_ext_info(); + auto init_ret = InitAicpuTaskExtInfo(ext_info); + if (init_ret != SUCCESS) { + GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); + return init_ret; + } + GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), + op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); + + aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); + aicpu_param_head->extInfoLength = static_cast(ext_info.size()); + // malloc device memory for args rtError_t rt_ret = rtMalloc(static_cast(&args_), args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 7717edd3..1f90ede1 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -159,9 +159,7 @@ class KernelTaskInfo : public TaskInfo { OpDescPtr op_desc_; DavinciModel *davinci_model_; uint32_t args_offset_ = 0; - uint32_t hybrid_args_offset_ = 0; int64_t fixed_addr_offset_ = 0; - std::unique_ptr args_addr = nullptr; bool call_save_dump_ = false; // aicpu ext_info device mem diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index f27d862d..9cd3f30b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -183,18 +183,22 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo addr_count_ = out_count; } -void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { +bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { const auto addr_val = reinterpret_cast(outside_addr); + bool set_batch_label_flag = false; for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { - auto args_addrs = outside_addrs_[out_count].find(outside_addr); - if (args_addrs != outside_addrs_[out_count].end()) { + auto &addrs_mapping_list = GetOutsideAddrs(); + auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); + if (args_addrs != addrs_mapping_list[out_count].end()) { GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); void *args_val = static_cast(args) + offset; args_addrs->second.push_back(args_val); GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, args, offset); + set_batch_label_flag = true; } } + return set_batch_label_flag; } } // namespace ge diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index 8ead742d..fa80f28b 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -51,7 +51,7 @@ class ZeroCopyOffset { const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, std::vector &tensor_addrs); - void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); + bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); // basic_addr of l2-fusion void *GetBasicAddr() const { return basic_addr_; } diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index b938f14b..98dccb3c 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -22,6 +22,8 @@ #include "common/ge_compiler_options.h" namespace ge { +const char *const kDefaultBatchLable = "Batch_default"; + ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} @@ -64,18 +66,59 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { const uint8_t *data = static_cast(info); args_info_.assign(data, data + size); - GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, + GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, args_addr_, args_size_, size); } +/** + * @ingroup ge + * @brief Check is dynamic batch node. + * @param [in] addr: virtual address value from Op. + * @param [in] data: data buffer from user. + * @param [in] batch_addrs: dynamic batch addr info. + * @param [in] batch_label: batch label. + * @return: true / false + */ +bool ZeroCopyTask::CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, + uintptr_t addr) { + // Used for dynamic batch / resolution scene + set dynamic_input_addrs; + auto dynamic_input_iter = batch_addrs.find(batch_label); + if (dynamic_input_iter != batch_addrs.end()) { + dynamic_input_addrs = dynamic_input_iter->second; + } + + set fix_input_addrs; + auto fix_input_iter = batch_addrs.find(kDefaultBatchLable); + if (fix_input_iter != batch_addrs.end()) { + fix_input_addrs = fix_input_iter->second; + } + + if (fix_input_addrs.empty()) { + if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) { + return false; + } + } else { + if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() && + fix_input_addrs.find(addr) == fix_input_addrs.end()) { + return false; + } + } + + return true; +} + /** * @ingroup ge * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. * @param [in] buffer_addr: real_data_buffer_addr from user. + * @param [in] batch_addrs: dynamic batch addr info. + * @param [in] batch_label: batch label. * @return: void */ -Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) { +Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, + const string &batch_label) { auto iter = task_addr_offset_.find(addr); if (iter != task_addr_offset_.end()) { auto &cur_pair = *iter; diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index efabc814..d0bb2b6d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -67,9 +67,12 @@ class ZeroCopyTask { * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. * @param [in] buffer_addr: data buffer_addr from user. + * @param [in] batch_addrs: dynamic batch addr info. + * @param [in] batch_label: batch label. * @return: 0 SUCCESS / others FAILED */ - ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr); + ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, + const string &batch_label); /** * @ingroup ge @@ -88,6 +91,9 @@ class ZeroCopyTask { return batch_label_; } + protected: + bool CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, uintptr_t addr); + private: const string name_; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 171f233f..bd476ad5 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -23,15 +23,25 @@ #include #include #include +#include +#include "common/ge/ge_util.h" #include "common/math/math_util.h" #include "common/thread_pool.h" +#include "common/util.h" +#include "external/graph/types.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/ge_types.h" #include "analyzer/analyzer.h" #include "graph/common/ge_call_wrapper.h" #include "graph/common/local_context.h" #include "graph/common/transop_util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/ge_global_options.h" +#include "graph/ge_local_context.h" +#include "graph/manager/graph_mem_allocator.h" #include "graph/manager/util/rt_context_util.h" #include "graph/partition/dynamic_shape_partition.h" #include "graph/passes/enter_pass.h" @@ -51,6 +61,8 @@ #include "graph/passes/dimension_adjust_pass.h" #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/flow_ctrl_pass.h" +#include "graph/passes/hccl_group_pass.h" +#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/input_output_connection_identify_pass.h" #include "graph/passes/iterator_op_pass.h" @@ -65,7 +77,7 @@ #include "graph/passes/permute_pass.h" #include "graph/passes/prune_pass.h" #include "graph/passes/ref_identity_delete_op_pass.h" -#include "graph/passes/remove_same_const_pass.h" +#include "graph/passes/replace_with_empty_const_pass.h" #include "graph/passes/reshape_recovery_pass.h" #include "graph/passes/reshape_remove_pass.h" #include "graph/passes/same_transdata_breadth_fusion_pass.h" @@ -75,12 +87,13 @@ #include "graph/passes/switch_logic_remove_pass.h" #include "graph/passes/switch_to_stream_switch_pass.h" #include "graph/passes/transop_breadth_fusion_pass.h" +#include "graph/passes/transop_depth_fusion_pass.h" #include "graph/passes/transop_nearby_allreduce_fusion_pass.h" #include "graph/passes/transop_symmetry_elimination_pass.h" #include "graph/passes/transop_without_reshape_fusion_pass.h" #include "graph/passes/transpose_transdata_pass.h" -#include "graph/passes/useless_control_out_remove_pass.h" #include "graph/passes/variable_op_pass.h" +#include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" #include "graph/passes/end_of_sequence_add_control_pass.h" @@ -91,6 +104,9 @@ #include "graph/passes/memcpy_addr_async_pass.h" #include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" +#include "graph/utils/type_utils.h" +#include "graph/graph_util.h" +#include "graph/types.h" #include "inc/pass_manager.h" #include "init/gelib.h" #include "ir_build/atc_ir_common.h" @@ -534,8 +550,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, - compute_graph->GetName(), session_id, + compute_graph->GetGraphID(), subgraph, compute_graph, session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -550,8 +565,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, - compute_graph->GetName(), session_id, + compute_graph->GetGraphID(), subgraph, compute_graph, session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -2134,7 +2148,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { TransposeTransDataPass transpose_transdata_pass; TransOpSymmetryEliminationPass symmetry_elimination_pass; DimensionComputePass dimension_compute_pass; - UselessControlOutRemovePass useless_control_out_remove_pass; names_to_passes.emplace_back("EnterPass", &enter_pass); names_to_passes.emplace_back("AddNPass", &addn_pass); names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination); @@ -2148,7 +2161,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); - names_to_passes.emplace_back("UselessControlOutRemovePass", &useless_control_out_remove_pass); GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphManager::OptimizeStage1_2"); @@ -2189,8 +2201,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", new (std::nothrow) VariableRefUselessControlOutDeletePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass)) - GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::RemoveSameConstPass", new (std::nothrow) RemoveSameConstPass)) if (options_.train_graph_flag) { // Priority: The GlobalStepInsertPass should work before graph partitioner. // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory @@ -2461,8 +2471,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, const SubGraphInfoPtr &sub_graph_info_ptr, - const std::string &root_graph_name, - uint64_t session_id, + const ComputeGraphPtr &compute_graph, uint64_t session_id, const GEThreadLocalContext &ge_context) { if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { GetContext().SetSessionId(session_id); @@ -2479,13 +2488,9 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); return FAILED; } - if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) { - GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \ - root_graph_name: %s.", root_graph_name.c_str()); - return FAILED; - } compute_graph_tmp->SetSessionID(session_id); Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, + compute_graph, engine_name); if (ret != SUCCESS) { GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index d2887c4c..feca02fc 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -219,8 +219,7 @@ class GraphManager { static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, const SubGraphInfoPtr &sub_graph_info_ptr, - const std::string &root_graph_name, - uint64_t session_id, + const ComputeGraphPtr &compute_graph, uint64_t session_id, const GEThreadLocalContext &ge_context); Status ParseInputsDims(const std::vector &input_tensor); void ParseInputsDimsForData(const std::vector &input_tensor); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 7ee7df20..b832986b 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -16,7 +16,10 @@ #include "graph/manager/graph_mem_allocator.h" +#include #include + +#include "framework/common/debug/ge_log.h" #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/rdma_pool_allocator.h" diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index c94408de..c5ebfda6 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -76,7 +76,8 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { } } -Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name) { +Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph, + const std::string &engine_name) { if (compute_graph == nullptr) { GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr."); return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; @@ -105,6 +106,10 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); if (ret != SUCCESS) { + auto root_graph = ge::GraphUtils::FindRootGraph(parent_graph); + if (root_graph != nullptr) { + ErrorManager::GetInstance().SaveMstuneCompileFailedMsg(root_graph->GetName()); + } GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); return ret; } diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h index 78d580b7..969b4720 100755 --- a/ge/graph/optimize/graph_optimize.h +++ b/ge/graph/optimize/graph_optimize.h @@ -42,7 +42,8 @@ class GraphOptimize { ~GraphOptimize() = default; // subgraph optimize - Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name); + Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph, + const std::string &engine_name); // original graph optimize Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph); diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index cd3509c7..c0e0f669 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -18,8 +18,6 @@ #include "ge/ge_api_types.h" #include "graph/common/omg_util.h" -using std::string; - namespace ge { Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { GELOGD("AttachStreamLabelPass Enter."); @@ -189,10 +187,21 @@ Status AttachStreamLabelPass::UpdateEnterNode() { } std::stack enter_nodes; + std::string batch_label; for (const auto &enter_node : pair.second) { enter_nodes.emplace(enter_node); + std::string tmp_label; + (void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); + if (!tmp_label.empty()) { + if (batch_label.empty()) { + batch_label = tmp_label; + } else if (batch_label != tmp_label) { + GELOGE(FAILED, "multi batch_label exist, label1=%s, label2=%s.", batch_label.c_str(), tmp_label.c_str()); + return FAILED; + } + } } - if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) { + if (UpdateLoopBranch(enter_nodes, active_label_list[0], batch_label) != SUCCESS) { GELOGE(FAILED, "Update stream_label for loop_branch failed."); return FAILED; } @@ -217,7 +226,10 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no } for (const auto &enter_node : enter_nodes) { - GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); + GE_CHECK_NOTNULL(enter_node->GetOpDesc()); + if (enter_node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL)) { + GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); + } } return SUCCESS; } @@ -229,7 +241,8 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no /// @param [in] batch_label /// @return Status /// -Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const string &stream_label) { +Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, + const std::string &batch_label) { std::stack nodes(enter_nodes); NodePtr cur_node = nullptr; while (!nodes.empty()) { @@ -238,6 +251,11 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_ for (const NodePtr &out_node : cur_node->GetOutAllNodes()) { OpDescPtr out_desc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(out_desc); + std::string tmp_label; + (void)AttrUtils::GetStr(out_desc, ATTR_NAME_BATCH_LABEL, tmp_label); + if (!tmp_label.empty() && (tmp_label != batch_label)) { + continue; + } std::string out_type = out_desc->GetType(); bool need_skip = out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h index ad71d58f..19f11480 100755 --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -58,9 +58,11 @@ class AttachStreamLabelPass : public GraphPass { /// @brief Update stream_label for loop_branch /// @param [in] enter_nodes /// @param [in] stream_label + /// @param [in] batch_label /// @return Status /// - static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label); + static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, + const std::string &batch_label); /// /// @brief Update stream_label start with enter nodes diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 3b854c18..68efbeb9 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -96,7 +96,7 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder node->GetName().c_str(), node->GetType().c_str()); continue; } - if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { + if (node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); nodes_re_pass.insert(node_to_re_pass); } else { diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc index 5701faf5..fc5fe69f 100755 --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -80,71 +80,7 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) { } } - ret = DealWithInNodes(node); - if (ret != SUCCESS) { - GELOGE(ret, "DealWithInNodes of %s failed.", node->GetName().c_str()); - return ret; - } - std::vector data_relink_io_map = {kDataInputIndex}; return IsolateAndDeleteNode(node, data_relink_io_map); } - -Status DimensionAdjustPass::DealWithInNodes(NodePtr &node) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(node->GetOpDesc()); - auto graph = node->GetOwnerComputeGraph(); - auto in_data_anchors = node->GetAllInDataAnchors(); - for (auto &in_data_anchor : in_data_anchors) { - if (in_data_anchor == nullptr) { - continue; - } - auto in_node_anchor = in_data_anchor->GetPeerOutAnchor(); - if (in_node_anchor == nullptr) { - continue; - } - auto in_node = in_node_anchor->GetOwnerNode(); - if (in_node->GetType() == SWITCHN) { - auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); - auto identity = - AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); - GE_CHECK_NOTNULL(identity); - GELOGI("Create new identity node[%s] after node %s[type: %s] success.", identity->GetName().c_str(), - in_node->GetName().c_str(), in_node->GetType().c_str()); - GE_CHK_STATUS_RET(GraphUtils::AddEdge(in_node_anchor, identity->GetInDataAnchor(0))) - GE_CHECK_NOTNULL(identity->GetOutControlAnchor()); - if (identity->GetOutControlAnchor()->IsLinkedWith(node->GetInControlAnchor())) { - continue; - } - GE_CHK_STATUS_RET(GraphUtils::AddEdge(identity->GetOutControlAnchor(), node->GetInControlAnchor())) - } - } - - return SUCCESS; -} - -NodePtr DimensionAdjustPass::AddIdentityNodeToGraph(const string &name, const GeTensorDesc &tensor, - ComputeGraphPtr &graph) { - if (graph == nullptr) { - GELOGE(INTERNAL_ERROR, "Comput graph ptr is null in creating identity node."); - return nullptr; - } - - OpDescPtr desc = MakeShared("", ""); - if (desc == nullptr) { - GELOGE(MEMALLOC_FAILED, "Failed to create op desc."); - return nullptr; - } - - desc->SetName(name); - desc->SetType(IDENTITY); - auto ret = desc->AddInputDesc(tensor); - auto ret2 = desc->AddOutputDesc(tensor); - if ((ret != GRAPH_SUCCESS) || (ret2 != GRAPH_SUCCESS)) { - GELOGE(INTERNAL_ERROR, "Failed to add input/output desc in creating identity."); - return nullptr; - } - - return graph->AddNodeFront(desc); -} } // namespace ge diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h index 7766f140..685d9694 100755 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -34,10 +34,6 @@ namespace ge { class DimensionAdjustPass : public BaseNodePass { public: Status Run(ge::NodePtr &node) override; - - private: - Status DealWithInNodes(ge::NodePtr &node); - NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tensor, ComputeGraphPtr &graph); }; } // namespace ge diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index 066c97cf..afeca78f 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -23,7 +23,6 @@ namespace { const size_t kOutNodesNum = 1; -const size_t kInCtrlNodesNum = 1; } namespace ge { @@ -56,7 +55,6 @@ Status EnterPass::Run(NodePtr &node) { if (out_ctrl_node == nullptr) { continue; } - GELOGI("Remove control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) { GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); @@ -64,12 +62,8 @@ Status EnterPass::Run(NodePtr &node) { } } } else { - if (OptimizeEnterWithOnlyDataOut(node, in_node) != SUCCESS) { - GELOGE(FAILED, "Optimize enter node[%s] with only out data node failed.", node->GetName().c_str()); - return FAILED; - } - if (UnlinkCtrlEdgeBeforeConst(node) != SUCCESS) { - GELOGE(FAILED, "Unlink control edge before const of node[%s]'s out nodes failed.", node->GetName().c_str()); + if (OptimizeEnter(node, in_node) != SUCCESS) { + GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); return FAILED; } } @@ -78,7 +72,7 @@ Status EnterPass::Run(NodePtr &node) { return SUCCESS; } -Status EnterPass::OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node) { +Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) { return SUCCESS; } @@ -89,61 +83,17 @@ Status EnterPass::OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node) } GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))) + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); const auto &out_data_anchor = node->GetOutDataAnchor(0); GE_CHECK_NOTNULL(out_data_anchor); for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)) - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)) + GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); } - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)) + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)); AddNodeDeleted(node); AddRePassNodesWithInOut(in_node); return SUCCESS; } - -Status EnterPass::UnlinkCtrlEdgeBeforeConst(NodePtr &node) { - auto out_ctrl_nodes = node->GetOutControlNodes(); - if (out_ctrl_nodes.empty()) { - return SUCCESS; - } - auto out_ctrl_anchor = node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(out_ctrl_anchor); - - for (auto &out_ctrl_node : out_ctrl_nodes) { - GE_CHECK_NOTNULL(out_ctrl_node); - if ((out_ctrl_node->GetType() != CONSTANT) && (out_ctrl_node->GetType() != CONSTANTOP)) { - continue; - } - auto in_ctrl_nodes = out_ctrl_node->GetInControlNodes(); - if (in_ctrl_nodes.size() != kInCtrlNodesNum) { - continue; - } - - // Skip when has merge out - bool has_merge_out = false; - auto out_nodes_of_const = out_ctrl_node->GetOutAllNodes(); - for (const auto &out_node_of_const : out_nodes_of_const) { - GE_CHECK_NOTNULL(out_node_of_const); - if (out_node_of_const->GetType() == MERGE || out_node_of_const->GetType() == REFMERGE) { - has_merge_out = true; - break; - } - } - if (has_merge_out) { - continue; - } - - GELOGI("Unlink control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); - GE_CHK_STATUS_RET(out_ctrl_anchor->Unlink(out_ctrl_node->GetInControlAnchor())) - for (auto &out_node_of_const : out_nodes_of_const) { - if (!out_ctrl_anchor->IsLinkedWith(out_node_of_const->GetInControlAnchor())) { - GELOGI("Link control edge from %s to %s.", node->GetName().c_str(), out_node_of_const->GetName().c_str()); - GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(out_node_of_const->GetInControlAnchor())) - } - } - } - return SUCCESS; -} } // namespace ge diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index 1417b1f0..677516ff 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -25,8 +25,7 @@ class EnterPass : public BaseNodePass { Status Run(NodePtr &node) override; private: - Status OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node); - Status UnlinkCtrlEdgeBeforeConst(NodePtr &node); + Status OptimizeEnter(NodePtr &node, NodePtr &in_node); }; } // namespace ge #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ diff --git a/ge/graph/passes/folding_pass.cc b/ge/graph/passes/folding_pass.cc index 227a0f61..93dc2c40 100755 --- a/ge/graph/passes/folding_pass.cc +++ b/ge/graph/passes/folding_pass.cc @@ -173,7 +173,10 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { continue; } auto in_node = in_node_anchor->GetOwnerNode(); - if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH) || (in_node->GetType() == SWITCHN)) { + if (in_node == nullptr) { + continue; + } + if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH)) { GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str()); auto ret = in_node_anchor->Unlink(in_data_anchor); if (ret != SUCCESS) { diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index c1a57a61..103fbb1b 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -89,6 +89,16 @@ Status MergeToStreamMergePass::ReplaceMergeNode(const ComputeGraphPtr &graph, co GE_CHK_STATUS_RET(SetNextIteration(stream_merge, next_iteration_name), "Set next iteration failed"); } + if (merge_op_desc->HasAttr(ATTR_NAME_BATCH_LABEL)) { + string batch_label; + (void)AttrUtils::GetStr(merge_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (!batch_label.empty()) { + auto stream_merge_desc = stream_merge->GetOpDesc(); + GE_CHECK_NOTNULL(stream_merge_desc); + (void)AttrUtils::SetStr(stream_merge_desc, ATTR_NAME_BATCH_LABEL, batch_label); + } + } + return AddActiveNodes(graph, stream_merge); } diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index cf46f09d..d8c4779d 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -19,8 +19,6 @@ #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" -using std::string; - namespace ge { Status NextIterationPass::Run(ComputeGraphPtr graph) { GELOGD("NextIterationPass Enter"); @@ -37,6 +35,10 @@ Status NextIterationPass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } } + if (GroupWithNoBatch(graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Group enter_nodes failed without batch_label attr."); + return INTERNAL_ERROR; + } if (FindWhileGroups() != SUCCESS) { GELOGE(INTERNAL_ERROR, "Find while groups failed."); @@ -71,22 +73,75 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) { return FAILED; } - string batch_label; - if (ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - frame_name += batch_label; + std::string batch_label; + (void)ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label); + if (batch_label.empty()) { + auto frame_iter = frame_enter_map_.find(frame_name); + if (frame_iter == frame_enter_map_.end()) { + std::vector enter_nodes; + enter_nodes.emplace_back(enter_node); + frame_enter_map_[frame_name] = enter_nodes; + } else { + frame_iter->second.emplace_back(enter_node); + } + return SUCCESS; } - auto iter = loop_group_map_.find(frame_name); - if (iter == loop_group_map_.end()) { + auto group_iter = loop_group_map_.find(frame_name); + if (group_iter == loop_group_map_.end()) { LoopCondGroupPtr loop_group = MakeShared(); if (loop_group == nullptr) { GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); return FAILED; } loop_group->enter_nodes.emplace_back(enter_node); - loop_group_map_[frame_name] = loop_group; + loop_group_map_[frame_name][batch_label] = loop_group; } else { - iter->second->enter_nodes.emplace_back(enter_node); + auto batch_iter = group_iter->second.find(batch_label); + if (batch_iter == group_iter->second.end()) { + LoopCondGroupPtr loop_group = MakeShared(); + if (loop_group == nullptr) { + GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); + return FAILED; + } + loop_group->enter_nodes.emplace_back(enter_node); + group_iter->second[batch_label] = loop_group; + } else { + batch_iter->second->enter_nodes.emplace_back(enter_node); + } + } + + return SUCCESS; +} + +/// +/// @brief Group Enter nodes without batch_label attr +/// @param [in] compute_graph +/// @return Status +/// +Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) { + if (frame_enter_map_.empty()) { + GELOGI("All enter nodes in graph %s has batch_label attr.", graph->GetName().c_str()); + return SUCCESS; + } + for (const auto &item : frame_enter_map_) { + const std::string &frame_name = item.first; + auto iter = loop_group_map_.find(frame_name); + if (iter == loop_group_map_.end()) { + LoopCondGroupPtr loop_group = MakeShared(); + if (loop_group == nullptr) { + GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); + return FAILED; + } + loop_group->enter_nodes = item.second; + loop_group_map_[frame_name][""] = loop_group; + } else { + for (auto &batch_item : iter->second) { + for (const auto &enter_node : item.second) { + batch_item.second->enter_nodes.emplace_back(enter_node); + } + } + } } return SUCCESS; @@ -99,39 +154,55 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) { Status NextIterationPass::FindWhileGroups() { for (const auto &loop_group_iter : loop_group_map_) { const std::string &frame_name = loop_group_iter.first; - for (const auto &enter_node : loop_group_iter.second->enter_nodes) { - for (const auto &out_node : enter_node->GetOutAllNodes()) { - const string &type = out_node->GetType(); - if ((type != MERGE) && (type != REFMERGE)) { - continue; - } - - NodePtr next_node = nullptr; - if (FindTargetNode(out_node, NEXTITERATION, true, next_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get NextIteration node failed, frame_name: %s", frame_name.c_str()); - return INTERNAL_ERROR; - } - loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); - - NodePtr switch_node = nullptr; - if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get Switch node failed, frame_name: %s.", frame_name.c_str()); - return INTERNAL_ERROR; - } - if (switch_node == nullptr) { - continue; - } - - NodePtr loop_cond = nullptr; - if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str()); - return INTERNAL_ERROR; - } - if (loop_group_iter.second->loop_cond == nullptr) { - loop_group_iter.second->loop_cond = loop_cond; - } else if (loop_group_iter.second->loop_cond != loop_cond) { - GELOGE(FAILED, "Multi LoopCond nodes exist, frame_name: %s.", frame_name.c_str()); - return FAILED; + for (const auto &batch_iter : loop_group_iter.second) { + const std::string &batch_label = batch_iter.first; + for (const auto &enter_node : batch_iter.second->enter_nodes) { + for (const auto &out_node : enter_node->GetOutAllNodes()) { + GELOGI("Find while_group for enter_node %s, frame_name:%s, batch_label:%s.", enter_node->GetName().c_str(), + frame_name.c_str(), batch_label.c_str()); + if ((out_node->GetType() != MERGE) && (out_node->GetType() != REFMERGE)) { + continue; + } + std::string tmp_label; + GE_CHECK_NOTNULL(out_node->GetOpDesc()); + (void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); + bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); + if (need_skip) { + continue; + } + + NodePtr next_node = nullptr; + if (FindTargetNode(out_node, NEXTITERATION, true, batch_label, next_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, + "Get NextIteration node failed: inputs of Merge should be Enter/NextIteration, current_Merge=%s", + out_node->GetName().c_str()); + return INTERNAL_ERROR; + } + batch_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); + + NodePtr switch_node = nullptr; + if (FindTargetNode(out_node, SWITCH, false, batch_label, switch_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get Switch node failed: output of Merge should be Switch, current_Merge=%s", + out_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (switch_node == nullptr) { + continue; + } + + NodePtr loop_cond = nullptr; + if (FindTargetNode(switch_node, LOOPCOND, true, batch_label, loop_cond) != SUCCESS) { + GELOGE(INTERNAL_ERROR, + "Get LoopCond node failed: pred input of Switch should be LoopCond, current_Switch=%s", + switch_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (batch_iter.second->loop_cond == nullptr) { + batch_iter.second->loop_cond = loop_cond; + } else if (batch_iter.second->loop_cond != loop_cond) { + GELOGE(FAILED, "Multi LoopCond nodes exist."); + return FAILED; + } } } } @@ -152,17 +223,19 @@ bool NextIterationPass::VerifyWhileGroup() { GELOGE(INTERNAL_ERROR, "Verify while group failed, frame_name is empty."); return false; } - if (loop_group_iter.second->loop_cond == nullptr) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); - return false; - } - - for (const auto &pair_iter : loop_group_iter.second->merge_next_pairs) { - if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", - frame_name.c_str()); + for (const auto &batch_iter : loop_group_iter.second) { + if (batch_iter.second->loop_cond == nullptr) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); return false; } + + for (const auto &pair_iter : batch_iter.second->merge_next_pairs) { + if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", + frame_name.c_str()); + return false; + } + } } } @@ -176,53 +249,56 @@ bool NextIterationPass::VerifyWhileGroup() { /// Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { for (const auto &loop_cond_iter : loop_group_map_) { - const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName(); - GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); - - // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge - NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); - NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); - if ((enter_active == nullptr) || (next_active == nullptr)) { - GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); - return INTERNAL_ERROR; - } - - for (const auto &enter_node : loop_cond_iter.second->enter_nodes) { - // Enter --> Active - if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge from %s to %s failed.", enter_node->GetName().c_str(), - enter_active->GetName().c_str()); + for (const auto &batch_iter : loop_cond_iter.second) { + const std::string &cond_name = batch_iter.second->loop_cond->GetName(); + GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); + + // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge + NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); + NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); + if ((enter_active == nullptr) || (next_active == nullptr)) { + GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); return INTERNAL_ERROR; } - } - for (const auto &pair : loop_cond_iter.second->merge_next_pairs) { - NodePtr merge_node = pair.first; - NodePtr next_node = pair.second; - // Active --> Merge - if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; + for (const auto &enter_node : batch_iter.second->enter_nodes) { + // Enter --> Active + if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != + GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; + } } - // NextIteration --> Active - if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; + for (const auto &pair : batch_iter.second->merge_next_pairs) { + NodePtr merge_node = pair.first; + NodePtr next_node = pair.second; + // Active --> Merge + if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != + GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; + } + + // NextIteration --> Active + if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; + } + + // break link between NextIteration and Merge + if (BreakNextIteration(next_node, merge_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); + return INTERNAL_ERROR; + } } - // break link between NextIteration and Merge - if (BreakNextIteration(next_node, merge_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); + if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || + (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { + GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); return INTERNAL_ERROR; } } - - if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || - (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { - GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); - return INTERNAL_ERROR; - } } return SUCCESS; @@ -289,11 +365,12 @@ Status NextIterationPass::BreakNextIteration(const NodePtr &next_node, NodePtr & /// @param [in] node /// @param [in] target_type /// @param [in] is_input +/// @param [in] batch_label /// @param [out] target_node /// @return Status /// Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, - NodePtr &target_node) { + const std::string &batch_label, NodePtr &target_node) { if (node == nullptr) { GELOGE(PARAM_INVALID, "node is null."); return PARAM_INVALID; @@ -310,6 +387,12 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string } for (const auto &tmp_node : nodes) { + std::string tmp_label; + (void)AttrUtils::GetStr(tmp_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); + bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); + if (need_skip) { + continue; + } const std::string type = tmp_node->GetType(); if ((target_type == LOOPCOND) && (type == target_type)) { target_node = tmp_node; @@ -332,6 +415,7 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string /// @return SUCCESS /// Status NextIterationPass::ClearStatus() { + frame_enter_map_.clear(); loop_group_map_.clear(); return SUCCESS; } diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index 3266254d..f8223c20 100755 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -46,6 +46,13 @@ class NextIterationPass : public GraphPass { /// Status GroupEnterNode(const NodePtr &enter_node); + /// + /// @brief Group Enter nodes without batch_label attr + /// @param [in] compute_graph + /// @return Status + /// + Status GroupWithNoBatch(const ComputeGraphPtr &graph); + /// /// @brief Find while groups /// @return Status @@ -90,10 +97,13 @@ class NextIterationPass : public GraphPass { /// @param [out] target_node /// @return Status /// - Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, NodePtr &target_node); + Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, + const std::string &batch_label, NodePtr &target_node); - // map - std::unordered_map loop_group_map_; + // map> + std::unordered_map> frame_enter_map_; + // map> + std::unordered_map> loop_group_map_; }; } // namespace ge #endif // GE_GRAPH_PASSES_NEXT_ITERATION_PASS_H_ diff --git a/ge/graph/passes/remove_same_const_pass.cc b/ge/graph/passes/remove_same_const_pass.cc deleted file mode 100644 index e75a4553..00000000 --- a/ge/graph/passes/remove_same_const_pass.cc +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "remove_same_const_pass.h" - -#include -#include -#include - -#include "common/base64.h" -#include "ge_local_engine/engine/host_cpu_engine.h" -#include "graph/utils/node_utils.h" - -namespace ge { -namespace { -std::string GetCseKey(const NodePtr &node) { - std::stringstream ss; - ss << node->GetType() << "control-inputs-"; - std::set control_in_node_names; - for (auto &src_node : node->GetInControlNodes()) { - control_in_node_names.insert(src_node->GetName()); - } - for (auto &name : control_in_node_names) { - ss << name << "-"; - } - - ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); - - return ss.str(); -} - -bool IsConstType(const NodePtr &node) { return (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP); } -} // namespace -Status RemoveSameConstPass::Run(ComputeGraphPtr graph) { - GELOGD("Begin to run RemoveSameConstPass on the graph"); - GE_CHECK_NOTNULL(graph); - std::map keys_to_node; - for (const auto &node : graph->GetDirectNode()) { - GE_CHECK_NOTNULL(node); - if (!IsConstType(node)) { - continue; - } - bool is_unknown = false; - auto ret = NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown); - if (ret != GRAPH_SUCCESS) { - GELOGW("Get node unknown status failed, node name:%s, type:%s.", - node->GetName().c_str(), node->GetType().c_str()); - continue; - } - if (is_unknown) { - GELOGI("Current node %s, type %s is unknown shape which should be skip.", - node->GetName().c_str(), node->GetType().c_str()); - continue; - } - auto key = GetCseKey(node); - GELOGD("The const node %s cse key %s", node->GetName().c_str(), ge::base64::EncodeToBase64(key).c_str()); - auto iter = keys_to_node.find(key); - if (iter == keys_to_node.end()) { - keys_to_node[key] = node; - continue; - } - - if (node->GetAllOutDataAnchorsSize() != iter->second->GetAllOutDataAnchorsSize()) { - GELOGW("The const node %s and %s have the same CSE key, but different output anchor count, skip to fusion them", - iter->second->GetName().c_str(), node->GetName().c_str()); - continue; - } - - std::vector output_map(node->GetAllOutDataAnchorsSize()); - for (size_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) { - output_map[i] = i; - } - - ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s", node->GetName().c_str(), - iter->second->GetName().c_str(), ret); - return INTERNAL_ERROR; - } - - NodeUtils::UnlinkAll(*node); - - ret = GraphUtils::RemoveNodeWithoutRelink(graph, node); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to remove node %s from graph", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - GELOGI("Remove const node %s by RemoveSameConstPass, replace it with node %s", node->GetName().c_str(), - iter->second->GetName().c_str()); - } - return SUCCESS; -} -} // namespace ge diff --git a/ge/graph/passes/remove_same_const_pass.h b/ge/graph/passes/remove_same_const_pass.h deleted file mode 100644 index 08905bd2..00000000 --- a/ge/graph/passes/remove_same_const_pass.h +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_ -#define GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_ - -#include "graph/types.h" -#include "inc/graph_pass.h" - -namespace ge { -class RemoveSameConstPass : public GraphPass { - public: - Status Run(ge::ComputeGraphPtr graph) override ; -}; -} // namespace ge -#endif //GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_ diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index a7b922e0..f75a104f 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -17,8 +17,13 @@ #include "graph/passes/switch_to_stream_switch_pass.h" #include #include "common/ge/ge_util.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/types.h" #include "ge/ge_api_types.h" #include "graph/common/omg_util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/utils/type_utils.h" @@ -120,13 +125,12 @@ void SwitchToStreamSwitchPass::MarkCycleDependence( if (visited.count(tmp_node) > 0) { continue; } + GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str()); for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) { if (switch_nodes.find(out_node) == switch_nodes.end()) { out_nodes.push(out_node); continue; } - GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.", - tmp_node->GetName().c_str(), out_node->GetName().c_str()); GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, GELOGW("set cyclic dependence attr failed."); return ); auto map_iter = switch_cyclic_map_.find(out_node); @@ -598,7 +602,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons /// Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, const std::set &same_cond_switch) { - GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), + GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), cast_node->GetName().c_str()); std::string orig_switch_name = switch_node->GetName(); OpDescPtr switch_desc = switch_node->GetOpDesc(); @@ -649,7 +653,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no /// Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch, const NodePtr &active_node) { - GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(), + GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(), stream_switch->GetName().c_str(), active_node->GetName().c_str()); auto find_res = switch_node_map_.find(switch_node); GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), { diff --git a/ge/graph/passes/useless_control_out_remove_pass.cc b/ge/graph/passes/useless_control_out_remove_pass.cc deleted file mode 100644 index 4d74d582..00000000 --- a/ge/graph/passes/useless_control_out_remove_pass.cc +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/useless_control_out_remove_pass.h" - -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/graph_utils.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" - -namespace ge { -Status UselessControlOutRemovePass::Run(NodePtr &node) { - GE_CHECK_NOTNULL(node); - - if ((node->GetType() != CONSTANT) && (node->GetType() != CONSTANTOP)) { - return SUCCESS; - } - GELOGD("UselessControlOutRemovePass running, node: %s.", node->GetName().c_str()); - - // const has no control input - if (node->GetInControlNodes().empty()) { - if (node->GetOutDataNodes().empty()) { - // It is an isolated const, just remove it. - GELOGI("Delete isolated const: %s.", node->GetName().c_str()); - GE_CHK_STATUS_RET(IsolateAndDeleteNode(node, {})) - AddNodeDeleted(node); - } else { - auto out_ctrl_anchor = node->GetOutControlAnchor(); - if (out_ctrl_anchor != nullptr && !out_ctrl_anchor->GetPeerAnchors().empty()) { - GELOGI("Node: %s unlink all out control edge.", node->GetName().c_str()); - out_ctrl_anchor->UnlinkAll(); - } - } - } - - return SUCCESS; -} -} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/useless_control_out_remove_pass.h b/ge/graph/passes/useless_control_out_remove_pass.h deleted file mode 100644 index d84b918f..00000000 --- a/ge/graph/passes/useless_control_out_remove_pass.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_ -#define GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_ - -#include "graph/passes/base_pass.h" - -namespace ge { -class UselessControlOutRemovePass : public BaseNodePass { - public: - Status Run(NodePtr &node) override; -}; -} // namespace ge - -#endif // GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_ \ No newline at end of file diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c8880b2e..9a07c696 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -44,8 +44,6 @@ using std::set; using std::string; using std::vector; -using std::map; -using std::queue; namespace ge { namespace multibatch { @@ -59,15 +57,10 @@ const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; const int kDivisionConst = 2; -const int32_t kOneInDataNode = 1; -const int32_t kFindNoMatch = 0; inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } -inline bool IsEnterType(const string &node_type) { return (node_type == ENTER) || (node_type == REFENTER); } -const set unchange_types({CONSTANT, CONSTANTOP, ENTER, REFENTER}); - inline bool IsGetNextType(const NodePtr &node) { std::string original_type; GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, @@ -225,6 +218,12 @@ Status MultiBatchGraphCopyer::CopyGraph() { return ret; } + ret = InsertIdentityAfterSwitchN(); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to insert identity nodes after switchn node."); + return INTERNAL_ERROR; + } + GELOGI("Begin to remove useless nodes by prune pass after copy process"); PrunePass prune_pass; ret = prune_pass.Run(graph_); @@ -241,18 +240,6 @@ Status MultiBatchGraphCopyer::Init() { return ret; } - ret = RelinkConstCtrlEdge(); - if (ret != SUCCESS) { - GELOGE(FAILED, "Relink const's control edge failed."); - return FAILED; - } - - ret = ExtractUnchangedStructureOutofCycle(); - if (ret != SUCCESS) { - GELOGE(FAILED, "Extract unchanged structure out of cycle failed."); - return FAILED; - } - for (auto &node : graph_->GetAllNodes()) { origin_all_nodes_.emplace_back(node); if (IsDataLikeType(node->GetType())) { @@ -265,281 +252,6 @@ Status MultiBatchGraphCopyer::Init() { return SUCCESS; } -Status MultiBatchGraphCopyer::RelinkConstCtrlEdge() { - for (auto &node : graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) { - if (node->GetOutDataNodes().empty()) { - continue; - } - if (!node->GetInControlNodes().empty()) { - auto in_ctrl_nodes = node->GetInControlNodes(); - auto out_nodes = node->GetOutAllNodes(); - bool has_merge_out = false; - for (const auto &out_node : out_nodes) { - GE_CHECK_NOTNULL(out_node); - if (out_node->GetType() == MERGE || out_node->GetType() == REFMERGE) { - has_merge_out = true; - break; - } - } - if (has_merge_out) { - continue; - } - auto in_ctrl_anchor = node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_ctrl_anchor); - in_ctrl_anchor->UnlinkAll(); - for (auto &in_ctrl_node : in_ctrl_nodes) { - auto out_ctrl_anchor_of_in_ctrl_node = in_ctrl_node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(out_ctrl_anchor_of_in_ctrl_node); - for (auto &out_node : out_nodes) { - if (IsEnterType(out_node->GetType())) { - continue; - } - if (!out_ctrl_anchor_of_in_ctrl_node->IsLinkedWith(out_node->GetInControlAnchor())) { - GE_CHK_STATUS_RET(out_ctrl_anchor_of_in_ctrl_node->LinkTo(out_node->GetInControlAnchor())) - } - } - } - } - auto out_ctrl_anchor = node->GetOutControlAnchor(); - if (out_ctrl_anchor != nullptr) { - out_ctrl_anchor->UnlinkAll(); - } - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::ExtractUnchangedStructureOutofCycle() { - map> frame_enter; - if (GetEnterNodesGroupByFrame(frame_enter) != SUCCESS) { - GELOGE(FAILED, "Get enter nodes grouped by frame_name failed."); - return FAILED; - } - - queue nodes_to_extract; - if (GetNodeNeedExtract(frame_enter, nodes_to_extract) != SUCCESS) { - GELOGE(FAILED, "Get nodes needed to extract failed."); - return FAILED; - } - - while (!nodes_to_extract.empty()) { - auto node = nodes_to_extract.front(); - nodes_to_extract.pop(); - OpDescPtr enter_desc = nullptr; - if (MoveInEntersInDataAnchorDown(node, enter_desc) != SUCCESS) { - GELOGE(FAILED, "Move in enter nodes' in data anchors down of %s failed.", node->GetName().c_str()); - return FAILED; - } - set out_nodes; - if (InsertEnterAfterNode(node, enter_desc, out_nodes) != SUCCESS) { - GELOGE(FAILED, "Insert enter node after %s failed.", node->GetName().c_str()); - return FAILED; - } - - if (MoveCtrlEdgeToOutNodes(node, out_nodes) != SUCCESS) { - GELOGE(FAILED, "Move %s's control edge to out nodes failed.", node->GetName().c_str()); - return FAILED; - } - - for (auto &out_node : out_nodes) { - GE_CHECK_NOTNULL(out_node); - if (AllInDataNodesUnchangeAndNoMergeOut(out_node)) { - nodes_to_extract.push(out_node); - } - } - } - - if (DeleteEnterWithoutDataOut() != SUCCESS) { - GELOGE(FAILED, "Delete enter node without out data nodes failed."); - return FAILED; - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(map> &frame_enter) { - for (auto &node : graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - if (IsEnterType(node->GetType())) { - if (!node->GetInControlNodes().empty() || !node->GetOutControlNodes().empty()) { - continue; - } - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - string frame_name; - if (!AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { - GELOGE(FAILED, "Get attr frame_name of enter[%] failed.", node->GetName().c_str()); - return FAILED; - } - frame_enter[frame_name].emplace_back(node); - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::GetNodeNeedExtract(const map> &frame_enter, - queue &nodes_to_extract) { - for (const auto &one_group : frame_enter) { - auto enters = one_group.second; - for (const auto &enter : enters) { - auto out_data_nodes = enter->GetOutDataNodes(); - for (const auto &out_data_node : out_data_nodes) { - GE_CHECK_NOTNULL(out_data_node); - if (AllInDataNodesUnchangeAndNoMergeOut(out_data_node)) { - nodes_to_extract.push(out_data_node); - } - } - } - } - - return SUCCESS; -} - -bool MultiBatchGraphCopyer::AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node) { - auto out_data_nodes = node->GetOutDataNodes(); - for (const auto &out_data_node : out_data_nodes) { - if (out_data_node == nullptr) { - return false; - } - - if (out_data_node->GetType() == MERGE || out_data_node->GetType() == REFMERGE) { - return false; - } - } - - auto in_data_nodes = node->GetInDataNodes(); - if (in_data_nodes.size() == kOneInDataNode) { - return true; - } - - for (const auto &in_data_node : in_data_nodes) { - if (in_data_node == nullptr) { - return false; - } - if (unchange_types.count(in_data_node->GetType()) == kFindNoMatch) { - return false; - } - } - - return true; -} - -Status MultiBatchGraphCopyer::MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc) { - auto in_data_anchors = node->GetAllInDataAnchors(); - for (auto &in_data_anchor : in_data_anchors) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); - auto peer_in_data_node = peer_out_data_anchor->GetOwnerNode(); - if (IsEnterType(peer_in_data_node->GetType())) { - GE_CHK_STATUS_RET(peer_out_data_anchor->Unlink(in_data_anchor)) - GELOGD("Unlink data edge from %s to %s.", peer_in_data_node->GetName().c_str(), node->GetName().c_str()); - auto enter_in_data_anchors = peer_in_data_node->GetAllInDataAnchors(); - for (auto &enter_in_data_anchor : enter_in_data_anchors) { - auto peer_out_data_anchor_of_enter = enter_in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor_of_enter); - if (peer_out_data_anchor_of_enter->IsLinkedWith(in_data_anchor)) { - continue; - } - GE_CHK_STATUS_RET(peer_out_data_anchor_of_enter->LinkTo(in_data_anchor)) - GELOGD("Relink data edge from %s to %s.", peer_out_data_anchor_of_enter->GetOwnerNode()->GetName().c_str(), - node->GetName().c_str()); - } - enter_desc = peer_in_data_node->GetOpDesc(); - GE_CHECK_NOTNULL(enter_desc); - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::InsertEnterAfterNode(NodePtr &node, const OpDescPtr ©_desc, set &out_nodes) { - if (copy_desc == nullptr) { - return SUCCESS; - } - map>> outanchors_inanchors_nodes; - auto out_data_anchors = node->GetAllOutDataAnchors(); - for (auto &out_data_anchor : out_data_anchors) { - auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors(); - for (auto peer_in_data_anchor : peer_in_data_anchors) { - GE_CHECK_NOTNULL(peer_in_data_anchor); - auto peer_in_data_node = peer_in_data_anchor->GetOwnerNode(); - out_nodes.emplace(peer_in_data_node); - outanchors_inanchors_nodes[out_data_anchor].emplace_back(std::make_pair(peer_in_data_anchor, peer_in_data_node)); - } - } - - int32_t i = 0; - auto node_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(node_desc); - // Insert one enter node after node's per out data anchor - for (auto &outanchor_inanchors_nodes : outanchors_inanchors_nodes) { - string name = node->GetName() + "_" + ENTER + "_" + std::to_string(i++); - GELOGD("Create Enter op %s after %s.", name.c_str(), node->GetName().c_str()); - auto enter_desc = AttrUtils::CopyOpDesc(copy_desc); - enter_desc->SetName(name); - GE_CHK_STATUS_RET( - enter_desc->UpdateInputDesc("x", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) - GE_CHK_STATUS_RET( - enter_desc->UpdateOutputDesc("y", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) - auto enter_node = graph_->AddNode(enter_desc); - GE_CHECK_NOTNULL(enter_node); - GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->LinkTo(enter_node->GetInDataAnchor(kDataInIndex))) - GE_CHECK_NOTNULL(enter_node->GetOutDataAnchor(kDataInIndex)); - for (auto &inanchor_node : outanchor_inanchors_nodes.second) { - GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->Unlink(inanchor_node.first)) - GE_CHK_STATUS_RET(enter_node->GetOutDataAnchor(kDataInIndex)->LinkTo(inanchor_node.first)) - GELOGD("Unlink from %s to %s, link from %s to %s then to %s.", node->GetName().c_str(), - inanchor_node.second->GetName().c_str(), node->GetName().c_str(), enter_node->GetName().c_str(), - inanchor_node.second->GetName().c_str()); - } - } - - return SUCCESS; -} - -// Move node's in control edges to out data nodes -Status MultiBatchGraphCopyer::MoveCtrlEdgeToOutNodes(NodePtr &node, set &out_nodes) { - auto in_ctrl_anchor = node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_ctrl_anchor); - auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors(); - for (auto &peer_out_ctrl_anchor : peer_out_ctrl_anchors) { - GE_CHK_STATUS_RET(peer_out_ctrl_anchor->Unlink(in_ctrl_anchor)) - GELOGD("Unlink control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), - node->GetName().c_str()); - for (auto &out_node : out_nodes) { - auto in_ctrl_anchor_of_out_node = out_node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_ctrl_anchor_of_out_node); - if (!peer_out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor_of_out_node)) { - GE_CHK_STATUS_RET(peer_out_ctrl_anchor->LinkTo(in_ctrl_anchor_of_out_node)) - GELOGD("Link control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), - out_node->GetName().c_str()); - } - } - } - - return SUCCESS; -} - -Status MultiBatchGraphCopyer::DeleteEnterWithoutDataOut() { - for (auto &node : graph_->GetAllNodes()) { - GE_CHECK_NOTNULL(node); - if (IsEnterType(node->GetType())) { - auto out_nodes = node->GetOutAllNodes(); - if (out_nodes.empty()) { - GELOGD("Delete enter node: %s which has no output.", node->GetName().c_str()); - GE_CHK_STATUS_RET(GraphUtils::IsolateNode(node, {})) - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph_, node)) - } - } - } - - return SUCCESS; -} - void MultiBatchGraphCopyer::LabelStatusForData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); GELOGI("Label status for %s, shape_dims is %s.", data->GetName().c_str(), @@ -585,9 +297,6 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { LabelStatusForGetNextSink(data); } } - - map> frame_enters; - InitStatus(frame_enters); bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { @@ -597,13 +306,12 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { if (iter != origin_nodes_status_.end()) { continue; } - for (auto &in_node : node->GetInDataNodes()) { - if (origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end()) { - if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end()) { - origin_nodes_status_[node.get()] == kNodeInBatchBranch; - ResetEnterStatus(frame_enters, node); - changed = true; - } + for (auto &in_node : node->GetInAllNodes()) { + bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && + origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; + if (is_in_batch) { + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + changed = true; break; } } @@ -612,45 +320,6 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { return SUCCESS; } -void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { - for (const auto &node : origin_all_nodes_) { - if (!IsEnterType(node->GetType())) { - continue; - } - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - string frame_name; - if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { - frame_enters[frame_name].emplace_back(node); - } - } - - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } -} - -void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { - if (!IsEnterType(node->GetType())) { - return; - } - - for (const auto &frame_enter : frame_enters) { - auto &enters = frame_enter.second; - if (std::find(enters.begin(), enters.end(), node) != enters.end()) { - for (const auto &enter : enters) { - origin_nodes_status_[enter.get()] = kNodeInBatchBranch; - } - break; - } - } -} - Status MultiBatchGraphCopyer::LabelStatus() { if (LabelInBatchBranchStatus() != SUCCESS) { GELOGE(PARAM_INVALID, "Failed to label no in batch branch"); @@ -1691,6 +1360,52 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { return SUCCESS; } +Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { + for (auto &node : graph_->GetAllNodes()) { + if (node->GetType() != SWITCHN) { + continue; + } + auto switchn_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(switchn_desc); + size_t i = 0; + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + auto out_node = in_data_anchor->GetOwnerNode(); + auto op_desc = out_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str()); + continue; + } + + auto identity_desc = MakeShared(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); + GE_CHECK_NOTNULL(identity_desc); + + string batch_label; + if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + GELOGE(FAILED, "Set attr ATTR_NAME_BATCH_LABEL failed, node:%s.", identity_desc->GetName().c_str()); + return FAILED; + } + } + + auto data_desc = switchn_desc->GetOutputDesc(i); + i++; + GE_CHK_STATUS_RET(identity_desc->AddInputDesc("x", data_desc)); + GE_CHK_STATUS_RET(identity_desc->AddOutputDesc("y", data_desc)); + + auto identity_node = graph_->AddNode(identity_desc); + GE_CHECK_NOTNULL(identity_node); + GE_CHK_STATUS_RET(out_data_anchor->LinkTo(identity_node->GetInDataAnchor(0))); + GE_CHECK_NOTNULL(identity_node->GetOutControlAnchor()); + GE_CHK_STATUS_RET(identity_node->GetOutControlAnchor()->LinkTo(out_node->GetInControlAnchor())); + } + } + } + + return SUCCESS; +} + Status ProcessMultiBatch(ComputeGraphPtr &graph) { if (GetLocalOmgContext().dynamic_node_type.empty()) { const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); @@ -1700,6 +1415,7 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { return pass_manager.Run(graph); } } + if (!GetLocalOmgContext().need_multi_batch) { GELOGI("No need to process_multi for no_train graph."); return SUCCESS; diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index d51c4c02..a0de4413 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -18,7 +18,6 @@ #include #include #include -#include #include "external/ge/ge_api_error_codes.h" @@ -65,26 +64,12 @@ class MultiBatchGraphCopyer { private: Status Init(); Status CheckArguments(); - Status RelinkConstCtrlEdge(); - - Status ExtractUnchangedStructureOutofCycle(); - Status GetEnterNodesGroupByFrame(std::map> &frame_enter); - Status GetNodeNeedExtract(const std::map> &frame_enter, - std::queue &nodes_to_extract); - bool AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node); - Status MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc); - Status InsertEnterAfterNode(NodePtr &node, const OpDescPtr &enter_desc, std::set &out_nodes); - Status MoveCtrlEdgeToOutNodes(NodePtr &node, std::set &out_nodes); - Status DeleteEnterWithoutDataOut(); // label status for origin_all_nodes_ Status LabelStatus(); Status LabelInBatchBranchStatus(); void LabelStatusForData(const NodePtr &data); void LabelStatusForGetNextSink(const NodePtr &data); - void InitStatus(std::map> &frame_enters); - void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); - // add nodes functions Status CreateNewNodes(); @@ -96,6 +81,7 @@ class MultiBatchGraphCopyer { Status InsertSwitchNForData(const NodePtr &node, const size_t &out_anchor_index, const size_t &peer_in_anchor_index, std::vector> &dynamic_out_to_switchn); + Status InsertIdentityAfterSwitchN(); Status UpdateMaxShapeToData(const NodePtr &node, size_t out_anchor_index); Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index f1c25290..1fe40c77 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -22,7 +22,6 @@ #include "common/blocking_queue.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/ge_local_context.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/common/tensor_value.h" #include "hybrid/executor/hybrid_profiler.h" @@ -39,7 +38,6 @@ struct GraphExecutionContext { uint64_t session_id = 0; const HybridModel *model = nullptr; - const GEThreadLocalContext *ge_context = nullptr; rtStream_t stream = nullptr; rtContext_t rt_context = nullptr; rtContext_t rt_gen_context = nullptr; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index e17998db..8ba687c2 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -95,7 +95,6 @@ Status HybridModelExecutor::InitExecutionContext() { context_.stream = stream_; context_.model = model_; context_.session_id = ::ge::GetContext().SessionId(); - context_.ge_context = &GetThreadLocalContext(); GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_); GE_CHECK_NOTNULL(context_.allocator); diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index ceed40b0..033c5304 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -18,7 +18,6 @@ #include #include "framework/common/debug/log.h" #include "graph/compute_graph.h" -#include "graph/utils/tensor_utils.h" #include "hybrid_execution_context.h" #include "subgraph_context.h" @@ -36,31 +35,29 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( this->num_pending_shapes_); } -Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { +Status ShapeInferenceState::UpdateInputShape(int idx, + const GeShape &ori_shape, + const GeShape &shape) { if (node_item.IsInputShapeStatic(idx)) { GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", node_item.NodeName().c_str(), idx, node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), - target.GetShape().ToString().c_str()); + shape.ToString().c_str()); return SUCCESS; } - int64_t tensor_size = -1; - (void) TensorUtils::GetSize(target, tensor_size); - GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", + GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]", node_item.NodeName().c_str(), idx, - target.GetShape().ToString().c_str(), - target.GetOriginShape().ToString().c_str(), - tensor_size); + shape.ToString().c_str(), + ori_shape.ToString().c_str()); std::lock_guard lk(mu_); auto tensor_desc = node_item.MutableInputDesc(idx); GE_CHECK_NOTNULL(tensor_desc); - tensor_desc->SetShape(target.GetShape()); - tensor_desc->SetOriginShape(target.GetOriginShape()); - (void) TensorUtils::SetSize(*tensor_desc, tensor_size); + tensor_desc->SetShape(shape); + tensor_desc->SetOriginShape(ori_shape); if (--num_pending_shapes_ == 0) { ready_cv_.notify_all(); } @@ -113,24 +110,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex for (auto &p : shape_futures) { auto idx = p.first; auto &future = p.second; + GeShape shape; + GeShape ori_shape; RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); - auto src_tensor_desc = future.GetTensorDesc(); - GE_CHECK_NOTNULL(src_tensor_desc); + GE_CHK_STATUS_RET(future.Get(ori_shape, shape), + "[%s] Get shape failed. index = %u", + node_item.NodeName().c_str(), + idx); RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); - auto input_desc = node_item.MutableInputDesc(idx); - GE_CHECK_NOTNULL(input_desc); - int64_t tensor_size = -1; - (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); - GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", node_item.NodeName().c_str(), idx, - src_tensor_desc->GetShape().ToString().c_str(), - src_tensor_desc->GetOriginShape().ToString().c_str(), - tensor_size); - input_desc->SetShape(src_tensor_desc->GetShape()); - input_desc->SetOriginShape(src_tensor_desc->GetOriginShape()); - (void) TensorUtils::SetSize(*input_desc, tensor_size); + shape.ToString().c_str(), + ori_shape.ToString().c_str()); + auto input_desc = node_item.MutableInputDesc(idx); + GE_CHECK_NOTNULL(input_desc); + input_desc->SetShape(std::move(shape)); + input_desc->SetOriginShape(ori_shape); } return SUCCESS; @@ -193,14 +190,5 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); return SUCCESS; } - -GeTensorDescPtr ShapeFuture::GetTensorDesc() { - GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); - if (!subgraph_context_->Await(src_node_)) { - GELOGE(INTERNAL_ERROR, "cancelled"); - return nullptr; - } - return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 312e177f..04f1ee4b 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -35,7 +35,6 @@ class ShapeFuture { ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); ~ShapeFuture() = default; Status Get(GeShape &ori_shape, GeShape &shape); - GeTensorDescPtr GetTensorDesc(); private: NodePtr src_node_; @@ -46,7 +45,7 @@ class ShapeFuture { struct ShapeInferenceState { explicit ShapeInferenceState(const NodeItem &node_item); - Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc); + Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape); void UpdateInputShapeFuture(int idx, ShapeFuture &&future); diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 4b6dddab..5a464f8e 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetOrCreateNodeState(input_node); GE_CHECK_NOTNULL(node_state); - node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); + node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); } } @@ -268,6 +268,13 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } + + GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); + GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node), + "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); + GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 21dd8e4b..b984eec3 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -20,9 +20,12 @@ #include "graph/utils/tensor_adapter.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/node_executor/node_executor.h" -#include "hybrid/executor//worker//shape_inference_engine.h" +#include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" +#include "common/types.h" +#include "common/ge_types.h" #include "common/profiling/profiling_manager.h" +#include "runtime/base.h" namespace ge { namespace hybrid { @@ -151,19 +154,18 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(model); - // only report aicpu and aicore node - bool is_profiling_report = context_->GetNodeItem().is_profiling_report; - if (!is_profiling_report) { - GELOGD("Node[%s] is not aicore or aicpu, and no need to report data.", node->GetName().c_str()); - return SUCCESS; - } - GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); auto op_desc = node->GetOpDesc(); std::string op_name = op_desc->GetName(); std::string dynamic_model_name = model->GetModelName(); - uint32_t task_id = context_->GetTaskId(); - uint32_t stream_id = context_->GetStreamId(); + + uint32_t task_id = 0; + uint32_t stream_id = 0; + if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { + GELOGE(PARAM_INVALID, "Get task_id and stream_id failed."); + return PARAM_INVALID; + } + TaskDescInfo tmp_task_desc_info; tmp_task_desc_info.model_name = dynamic_model_name; tmp_task_desc_info.op_name = op_name; @@ -175,8 +177,6 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel * } tmp_task_desc_info.task_id = task_id; tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = "dynamic"; - tmp_task_desc_info.cur_iter_num = graph_context_->iteration; GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", node->GetName().c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); @@ -348,10 +348,6 @@ Status NodeDoneCallback::OnNodeDone() { } GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); - if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) { - // update output tensor sizes - GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item)); - } // PropagateOutputs for type == DEPEND_COMPUTE if (node_item.shape_inference_type == DEPEND_COMPUTE) { if (graph_context_->trace_enabled) { diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 66d0ede2..1d813526 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -17,15 +17,9 @@ #include "hybrid/executor/worker/shape_inference_engine.h" #include "graph/shape_refiner.h" #include "graph/utils/node_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" -#include "common/math/math_util.h" #include "hybrid/node_executor/node_executor.h" namespace ge { -namespace { -const int kAlignment = 32; -} namespace hybrid { ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) : execution_context_(execution_context), @@ -46,9 +40,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { } if (node_item.fused_subgraph != nullptr) { - GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph)); - GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item)); - return SUCCESS; + return InferShapeForSubgraph(node_item, *node_item.fused_subgraph); } // Skip shape inference for node of type DEPEND_COMPUTE @@ -71,15 +63,21 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), - "Invoke InferShapeAndType failed."); + "Invoke InferShapeAndType failed."); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); } + // Check again to make sure shape is valid after shape inference + if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { + bool is_unknown_shape = false; + GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), + "Failed to get shape status. node = %s", + node_item.NodeName().c_str()); - // update output tensor sizes after shape inference - // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE - RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); - GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE)); - RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); + GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, + INTERNAL_ERROR, + "[%s] Shape is still unknown after shape inference.", + node_item.NodeName().c_str()); + } GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", node_item.NodeName().c_str(), @@ -129,6 +127,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { // propagate each output for (int i = 0; i < node_item.num_outputs; ++i) { auto output_desc = node_item.op_desc->MutableOutputDesc(i); + const auto &shape = output_desc->MutableShape(); + const auto &ori_shape = output_desc->GetOriginShape(); auto &output_nodes = node_item.outputs[i]; // propagate output to all sub-inputs @@ -149,7 +149,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, std::move(future)); } else { - GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc)); + GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, + ori_shape, + shape)); } } } @@ -228,92 +230,5 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { } return SUCCESS; } - -Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc, - std::vector &shape, - bool fallback_with_range) { - const auto &tensor_shape = tensor_desc.MutableShape(); - if (tensor_shape.IsUnknownShape()) { - if (!fallback_with_range) { - GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]", - tensor_shape.ToString().c_str()); - return INTERNAL_ERROR; - } - - GELOGD("Calc output size by range"); - std::vector> shape_range; - GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range"); - if (shape_range.size() != shape.size()) { - GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)", - shape_range.size(), - shape.size()); - return INTERNAL_ERROR; - } - - for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) { - if (shape[dim_index] == ge::UNKNOWN_DIM) { - shape[dim_index] = shape_range[dim_index].second; - } - } - - GELOGD("After canonicalization, shape = [%s], before = [%s]", - GeShape(shape).ToString().c_str(), - tensor_shape.ToString().c_str()); - } - - return SUCCESS; -} - -Status ShapeInferenceEngine::CalcTensorSize(DataType data_type, - const std::vector &shape, - int64_t &tensor_size) { - GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str()); - uint32_t type_size; - if (!TypeUtils::GetDataTypeLength(data_type, type_size)) { - GELOGE(INTERNAL_ERROR, "Failed to get data type size"); - return INTERNAL_ERROR; - } - - tensor_size = type_size; - for (const auto &dim : shape) { - GE_CHECK_GE(dim, 0); - GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), - "Shape size overflow, shape = [%s]", - GeShape(shape).ToString().c_str()); - tensor_size *= dim; - } - - GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), - "Tensor size is too large: %ld, shape = [%s]", - tensor_size, - GeShape(shape).ToString().c_str()); - tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; - return SUCCESS; -} - -Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { - auto op_desc = node_item.GetOpDesc(); - for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { - auto tensor_desc = op_desc->MutableOutputDesc(output_index); - GE_CHECK_NOTNULL(tensor_desc); - const auto &shape = tensor_desc->MutableShape(); - // modify on copy - auto dims = shape.GetDims(); - GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range), - "[%s] Failed to canonicalize shape for output %zu", - node_item.NodeName().c_str(), - output_index); - - int64_t tensor_size; - GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size), - "[%s] Failed to calc tensor size for output %zu", - node_item.NodeName().c_str(), - output_index); - GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size); - (void) TensorUtils::SetSize(*tensor_desc, tensor_size); - } - - return SUCCESS; -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h index b946577f..7bb9269c 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/ge/hybrid/executor/worker/shape_inference_engine.h @@ -34,11 +34,7 @@ class ShapeInferenceEngine { Status PropagateOutputShapes(const NodeItem &node_item); - static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); - private: - static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector &shape, bool fallback_with_range); - static Status CalcTensorSize(DataType data_type, const std::vector &shape, int64_t &tensor_size); static Status UpdatePeerNodeShape(const Node &node); Status AwaitDependentNodes(NodeState &node_state); diff --git a/ge/hybrid/executor/worker/task_compile_engine.cc b/ge/hybrid/executor/worker/task_compile_engine.cc index f80374c6..e2e94f66 100755 --- a/ge/hybrid/executor/worker/task_compile_engine.cc +++ b/ge/hybrid/executor/worker/task_compile_engine.cc @@ -26,9 +26,6 @@ Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext * RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context)); - if (context->ge_context != nullptr) { - GetThreadLocalContext() = *context->ge_context; - } shared_ptr kernel_task; auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task); RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End"); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index cba83dbd..d519c35b 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -226,10 +226,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n new_node->node_id = node_index; new_node->op_desc->SetId(node_index); node_index += 1; - NodeExecutorManager::ExecutorType executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node); - new_node->is_profiling_report = (executor_type == NodeExecutorManager::ExecutorType::AICORE) || - (executor_type == NodeExecutorManager::ExecutorType::AICPU_TF) || - (executor_type == NodeExecutorManager::ExecutorType::AICPU_CUSTOM); + *node_item = new_node.get(); node_items[node] = std::move(new_node); return SUCCESS; diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index eb00f509..69cf334d 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -22,7 +22,6 @@ #include "graph/debug/ge_attr_define.h" #include "graph/utils/node_utils.h" #include "hybrid/node_executor/node_executor.h" -#include "hybrid/executor/worker/shape_inference_engine.h" namespace ge { namespace hybrid { @@ -48,7 +47,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr GE_CHECK_NOTNULL(dst_op_desc); auto in_idx = node_and_anchor.second->GetIdx(); auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); - fused_subgraph.input_mapping[static_cast(parent_index)].emplace_back(tensor_desc); + fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc); GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); } @@ -65,7 +64,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap return FAILED; } - fused_subgraph.output_mapping.emplace(static_cast(parent_index), op_desc); + fused_subgraph.output_mapping.emplace(parent_index, op_desc); return SUCCESS; } @@ -127,7 +126,12 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr &node_ite return SUCCESS; } -void NodeItem::ResolveOptionalInputs() { +Status NodeItem::Init() { + GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); + GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); + num_inputs = static_cast(op_desc->GetInputsSize()); + num_outputs = static_cast(op_desc->GetOutputsSize()); + if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { has_optional_inputs = true; for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { @@ -139,18 +143,7 @@ void NodeItem::ResolveOptionalInputs() { } } } -} -Status NodeItem::InitInputsAndOutputs() { - GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); - GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); - num_inputs = static_cast(op_desc->GetInputsSize()); - num_outputs = static_cast(op_desc->GetOutputsSize()); - ResolveOptionalInputs(); - return SUCCESS; -} - -Status NodeItem::ResolveDynamicState() { (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); if (!is_dynamic) { @@ -158,54 +151,38 @@ Status NodeItem::ResolveDynamicState() { "[%s] Failed to get shape status.", node->GetName().c_str()); } - return SUCCESS; -} -Status NodeItem::ResolveStaticInputsAndOutputs() { - for (int i = 0; i < num_inputs; ++i) { - const auto &input_desc = MutableInputDesc(i); - GE_CHECK_NOTNULL(input_desc); - if (input_desc->MutableShape().IsUnknownShape()) { - is_input_shape_static_.push_back(false); - } else { - num_static_input_shapes++; - is_input_shape_static_.push_back(true); - GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", - NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); + if (is_dynamic) { + for (int i = 0; i < num_inputs; ++i) { + const auto &input_desc = MutableInputDesc(i); + GE_CHECK_NOTNULL(input_desc); + if (input_desc->MutableShape().IsUnknownShape()) { + is_input_shape_static_.push_back(false); + } else { + num_static_input_shapes++; + is_input_shape_static_.push_back(true); + GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", + NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); + } } - } - for (int i = 0; i < num_outputs; ++i) { - const auto &output_desc = op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL(output_desc); - if (output_desc->MutableShape().IsUnknownShape()) { - is_output_shape_static = false; - break; + for (int i = 0; i < num_outputs; ++i) { + const auto &output_desc = op_desc->MutableOutputDesc(i); + GE_CHECK_NOTNULL(output_desc); + if (output_desc->MutableShape().IsUnknownShape()) { + is_output_shape_static = false; + break; + } } - } - - if (is_output_shape_static) { - GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); - } - return SUCCESS; -} -void NodeItem::ResolveUnknownShapeType() { - if (IsControlOp() || node_type == PARTITIONEDCALL) { - shape_inference_type = DEPEND_COMPUTE; - } else { - int32_t unknown_shape_type_val = 0; - (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); - shape_inference_type = static_cast(unknown_shape_type_val); - } -} + if (IsControlOp() || node_type == PARTITIONEDCALL) { + shape_inference_type = DEPEND_COMPUTE; + } else { + int32_t unknown_shape_type_val = 0; + (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + shape_inference_type = static_cast(unknown_shape_type_val); + } -Status NodeItem::Init() { - GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs()); - GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState()); - if (is_dynamic) { - ResolveUnknownShapeType(); - GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs()); GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); } diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index a34227c5..8fbdc648 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -99,16 +99,10 @@ struct NodeItem { std::map reuse_inputs; std::map reuse_outputs; int num_static_input_shapes = 0; - bool is_profiling_report = false; private: explicit NodeItem(NodePtr node); Status Init(); - Status InitInputsAndOutputs(); - void ResolveOptionalInputs(); - Status ResolveDynamicState(); - Status ResolveStaticInputsAndOutputs(); - void ResolveUnknownShapeType(); std::vector is_input_shape_static_; std::vector input_desc_indices_; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index d2cfbece..407210cf 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -165,16 +165,6 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function } RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "Get task_id and stream_id failed."); - return rt_ret; - } - context.SetTaskId(task_id); - context.SetStreamId(stream_id); - GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 43f4f6d2..7330f616 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -189,17 +189,6 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function &callback_fun) const { - if (callback_fun == nullptr) { - GELOGW("[%s] Callback is NULL", GetNodeName()); - return SUCCESS; - } auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); if (ret != SUCCESS) { GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); @@ -319,22 +315,6 @@ void TaskContext::SetStatus(Status status) { } } -uint32_t TaskContext::GetTaskId() const { - return task_id_; -} - -void TaskContext::SetTaskId(uint32_t task_id) { - task_id_ = task_id; -} - -uint32_t TaskContext::GetStreamId() const { - return stream_id_; -} - -void TaskContext::SetStreamId(uint32_t stream_id) { - stream_id_ = stream_id; -} - Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { GE_CHECK_NOTNULL(buffer); if (ori_addr == nullptr) { @@ -404,20 +384,6 @@ const char *TaskContext::GetNodeName() const { return node_item_->NodeName().c_str(); } -void TaskContext::ReleaseInputsAndOutputs() { - for (int i = 0; i < node_item_->num_inputs; ++i) { - auto tensor = inputs_start_ + i; - tensor->Destroy(); - GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i); - } - - for (int i = 0; i < node_item_->num_outputs; ++i) { - auto tensor = outputs_start_ + i; - tensor->Destroy(); - GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i); - } -} - void TaskContext::ReleaseInput(int index) { auto input_tensor = MutableInput(index); if (input_tensor != nullptr) { @@ -490,9 +456,5 @@ Status TaskContext::TryExecuteCallback(const function &callback_fun) con const DumpProperties &TaskContext::GetDumpProperties() const { return execution_context_->dump_properties; } - -bool TaskContext::NeedCallback() { - return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 0e85a8e3..0549a1dc 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -50,8 +50,6 @@ class TaskContext { ConstGeTensorDescPtr GetOutputDesc(int index) const; GeTensorDescPtr MutableInputDesc(int index) const; GeTensorDescPtr MutableOutputDesc(int index) const; - void ReleaseInputsAndOutputs(); - bool NeedCallback(); void ReleaseInput(int index); const TensorValue *GetInput(int index) const; const TensorValue *GetOutput(int index) const; @@ -96,12 +94,6 @@ class TaskContext { void SetStatus(Status status); - uint32_t GetTaskId() const; - void SetTaskId(uint32_t task_id); - - uint32_t GetStreamId() const; - void SetStreamId(uint32_t stream_id); - bool IsForceInferShape() const; void SetForceInferShape(bool force_infer_shape); void *handle_ = nullptr; @@ -123,8 +115,6 @@ class TaskContext { Status status_ = SUCCESS; std::vector workspaces_; uint64_t iteration_ = 0; - uint32_t task_id_= 0; - uint32_t stream_id_ = 0; }; } // namespace hybrid } // namespace ge diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc index 77d749de..7d63514c 100755 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -63,19 +63,18 @@ vector SplitInputShape(const std::string &input_shape) { } } // namespace -Status CheckInputFormat(const string &input_format) { +Status CheckInputFormat(const std::string &input_format) { if (input_format.empty()) { return ge::SUCCESS; } if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"}); - GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str()); + "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format not found"}); + GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); return ge::PARAM_INVALID; } return ge::SUCCESS; } - bool CheckDynamicBatchSizeInputShapeValid(unordered_map> shape_map, std::string &dynamic_batch_size) { int32_t size = 0; diff --git a/ge/ir_build/atc_ir_common.h b/ge/ir_build/atc_ir_common.h index b26c2f2b..1e4af73e 100644 --- a/ge/ir_build/atc_ir_common.h +++ b/ge/ir_build/atc_ir_common.h @@ -75,7 +75,7 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf); Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory); Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream); Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); -Status CheckInputFormat(const string &input_format); +Status CheckInputFormat(const std::string &input_format); void PrintOptionMap(std::map &options, std::string tips); void EraseEndSemicolon(std::string ¶m); } diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 46d25d97..b7188a85 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -305,7 +305,7 @@ class GFlagUtils { " --debug_dir Set the save path of operator compilation intermediate files.\n" "Default value: ./kernel_meta\n" " --op_compiler_cache_dir Set the save path of operator compilation cache files.\n" - "Default value: $HOME/atc_data\n" + "Default value: $HOME/atc_data/kernel_cache\n" " --op_compiler_cache_mode Set the operator compilation cache mode." "Options are disable(default), enable and force(force to refresh the cache)"); diff --git a/ge/proto/op_mapping_info.proto b/ge/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/ge/proto/op_mapping_info.proto +++ b/ge/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 1f3fc5c5..a2652b67 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -32,16 +32,14 @@ namespace ge { namespace { const size_t kDataMemAlignSize = 32; const size_t kDataMemAlignUnit = 2; -const string kShapeTypeDynamic = "dynamic"; -const string kShapeTypeStatic = "static"; size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; return aligned_size; } -Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { - if (!ProfilingManager::Instance().ProfilingModelLoadOn()) { +Status ProfilingTaskInfo(OpTask *op_task) { + if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { return SUCCESS; } @@ -68,8 +66,6 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { tmp_task_desc_info.block_dim = block_dim; tmp_task_desc_info.task_id = task_id; tmp_task_desc_info.stream_id = stream_id; - tmp_task_desc_info.shape_type = shape_type; - tmp_task_desc_info.cur_iter_num = 0; GELOGD("GetTaskDescInfo of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id); task_desc_info.emplace_back(tmp_task_desc_info); @@ -197,7 +193,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task, kShapeTypeStatic)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task)); } return ret; @@ -259,7 +255,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, std::lock_guard lk(*stream_mutex_); GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); - GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); + GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get())); return SUCCESS; } } // namespace ge diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 51c3e845..20b1354d 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -119,11 +119,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_works uintptr_t *arg_base = nullptr; size_t arg_num = 0; GetIoAddr(arg_base, arg_num); - if (arg_num < all_addresses.size()) { - GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", + if (arg_num != all_addresses.size()) { + GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu", op_desc_->GetName().c_str(), - all_addresses.size(), - arg_num); + arg_num, + all_addresses.size()); return INTERNAL_ERROR; } diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 9b361b96..cce17f93 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -293,7 +293,6 @@ const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path"; // Configure op bank path const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path"; -const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -367,7 +366,6 @@ static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); -static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); // for interface: aclgrphBuildModel @@ -391,13 +389,22 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE, MDL_BANK_PATH, - OP_BANK_PATH, - OP_BANK_UPDATE}; + OP_BANK_PATH}; // for interface: aclgrphParse -const std::set ir_parser_suppported_options = { - INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, - OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES}; +const std::set ir_parser_suppported_options = {INPUT_FORMAT, + INPUT_SHAPE, + OP_NAME_MAP, + IS_DYNAMIC_INPUT, + INPUT_FP16_NODES, + IS_INPUT_ADJUST_HW_LAYOUT, + IS_OUTPUT_ADJUST_HW_LAYOUT, + OUTPUT, + OUTPUT_TYPE, + OUT_NODES, + COMPRESS_WEIGHT_CONF, + ENABLE_SCOPE_FUSION_PASSES, + LOG_LEVEL}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 4267aec4..254dbada 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -37,9 +37,7 @@ enum FrameworkType { MINDSPORE = 1, TENSORFLOW = 3, ANDROID_NN, -#ifndef ONLY_COMPILE_OPEN_SRC ONNX, -#endif FRAMEWORK_RESERVED, }; @@ -248,8 +246,6 @@ struct TaskDescInfo { uint32_t block_dim; uint32_t task_id; uint32_t stream_id; - std::string shape_type; - int64_t cur_iter_num; }; // Profiling info of graph diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 1b78860d..5a73126f 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -30,6 +30,8 @@ #include "runtime/base.h" namespace ge { +class ModelListenerAdapter; + class SingleOp; class DynamicSingleOp; @@ -53,8 +55,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status Initialize(); ge::Status Finalize(); + // Load model + ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority, + std::shared_ptr listener); + ge::Status UnloadModel(uint32_t modelId); + ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data); + // Get input and output descriptor ge::Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, bool new_model_desc = false); @@ -160,6 +168,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); + ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data, + std::shared_ptr listener); + ge::Status CommandHandle(const ge::Command &command); ge::Status SetDump(const DumpConfig &dump_config); @@ -286,6 +297,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { private: static bool isInit_; }; + +ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info); } // namespace ge #endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 57cff9a7..240bd839 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -36,7 +36,7 @@ using Status = domi::Status; namespace domi { using GetGraphCallback = std::function( - const google::protobuf::Message *root_proto, const std::string &graph)>; + const google::protobuf::Message *root_proto, const std::string &graph)>; class ModelParser { public: ModelParser() {} @@ -44,20 +44,19 @@ class ModelParser { virtual ~ModelParser() {} /** - * @ingroup domi_omg - * @brief Analyze network model data - * @param [in] file Network model file path - * @param [in|out] graph Save the network information after analysis - * @return SUCCESS - * @return Others failed - */ + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] file Network model file path + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ virtual Status Parse(const char *file, ge::Graph &graph) = 0; /** * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph * @param [in] input Model file memory data - * @param [in] input Model file memory size * @param [in|out] graph A graph for saving the model information after analysis * @return SUCCESS * @return FAILED @@ -65,7 +64,6 @@ class ModelParser { */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; -#ifndef ONLY_COMPILE_OPEN_SRC /** * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph @@ -77,37 +75,37 @@ class ModelParser { * @author */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; -#endif /** - * @ingroup domi_omg - * @brief Analyze network model data - * @param [in] proto network model - * @param [in|out] graph Save the network information after analysis - * @return SUCCESS - * @return Others failed - */ + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] proto network model + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; /** - * @ingroup domi_omg - * @brief Analyze callback model data in subgraph - * @param [in] proto network model - * @param [in] callback callback of subgraph - * @param [in|out] graph Save the network information after analysis - * @return SUCCESS - * @return Others failed - */ - virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, + * @ingroup domi_omg + * @brief Analyze callback model data in subgraph + * @param [in] proto network model + * @param [in] callback callback of subgraph + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, + GetGraphCallback callback, ge::ComputeGraphPtr &graph) = 0; /** - * @ingroup domi_omg - * @brief Convert model files to JSON format - * @param [in] model_file Model file path to be converted - * @param [out] json_file Converted JSON file path - * @return SUCCESS - * @return Others failed - */ + * @ingroup domi_omg + * @brief Convert model files to JSON format + * @param [in] model_file Model file path to be converted + * @param [out] json_file Converted JSON file path + * @return SUCCESS + * @return Others failed + */ virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } /* diff --git a/metadef b/metadef index 8c89c521..5a1b0ab9 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 8c89c521f5d682327b2f975cf06f7093960eb2f0 +Subproject commit 5a1b0ab95e2d205ee9ee578ac4bcde4f4fbed6d8 diff --git a/parser b/parser index 54ec7731..77dc42c3 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 54ec7731e3a2951191693e02ff3165220975ed0c +Subproject commit 77dc42c383e416ed4a0f606ddc3c02cdaa082ac3 diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 9b45e7e2..75eefdd1 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -384,8 +384,3 @@ rtError_t rtModelExit(rtModel_t model, rtStream_t stream) { return RT_ERROR_NONE; } - -rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) -{ - return RT_ERROR_NONE; -} diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt index 99b21182..2f8776e3 100644 --- a/tests/ut/common/graph/CMakeLists.txt +++ b/tests/ut/common/graph/CMakeLists.txt @@ -61,67 +61,58 @@ set(UT_FILES ) set(SRC_FILES - "${GE_CODE_DIR}/metadef/graph/option/ge_local_context.cc" - "${GE_CODE_DIR}/metadef/graph/option/ge_context.cc" - "${GE_CODE_DIR}/metadef/graph/anchor.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/attr_value.cc" - "${GE_CODE_DIR}/metadef/graph/buffer.cc" - "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" - "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" - "${GE_CODE_DIR}/metadef/graph/graph.cc" - "${GE_CODE_DIR}/metadef/graph/gnode.cc" - "${GE_CODE_DIR}/metadef/graph/ascend_string.cc" - "${GE_CODE_DIR}/metadef/graph/model.cc" - "${GE_CODE_DIR}/metadef/graph/model_serialize.cc" - "${GE_CODE_DIR}/metadef/graph/node.cc" - "${GE_CODE_DIR}/metadef/graph/op_desc.cc" - "${GE_CODE_DIR}/metadef/graph/operator.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory.cc" - "${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" - "${GE_CODE_DIR}/metadef/graph/tensor.cc" - "${GE_CODE_DIR}/metadef/graph/ge_tensor.cc" - "${GE_CODE_DIR}/metadef/graph/shape_refiner.cc" - "${GE_CODE_DIR}/metadef/graph/format_refiner.cc" - "${GE_CODE_DIR}/metadef/graph/inference_context.cc" - "${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" - "${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/ge_ir_utils.cc" - "${GE_CODE_DIR}/metadef/graph/utils/tensor_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/option/ge_local_context.cc" + #"${GE_CODE_DIR}/metadef/graph/option/ge_context.cc" + #"${GE_CODE_DIR}/metadef/graph/anchor.cc" + #"${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" + #"${GE_CODE_DIR}/metadef/graph/attr_value.cc" + #"${GE_CODE_DIR}/metadef/graph/buffer.cc" + #"${GE_CODE_DIR}/metadef/graph/compute_graph.cc" + #"${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" + #"${GE_CODE_DIR}/metadef/graph/graph.cc" + #"${GE_CODE_DIR}/metadef/graph/gnode.cc" + #"${GE_CODE_DIR}/metadef/graph/ascend_string.cc" + #"${GE_CODE_DIR}/metadef/graph/model.cc" + #"${GE_CODE_DIR}/metadef/graph/model_serialize.cc" + #"${GE_CODE_DIR}/metadef/graph/node.cc" + #"${GE_CODE_DIR}/metadef/graph/op_desc.cc" + #"${GE_CODE_DIR}/metadef/graph/operator.cc" + #"${GE_CODE_DIR}/metadef/graph/operator_reg.cc" + #"${GE_CODE_DIR}/metadef/graph/operator_factory.cc" + #"${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" + #"${GE_CODE_DIR}/metadef/graph/range_vistor.cc" + #"${GE_CODE_DIR}/metadef/graph/tensor.cc" + #"${GE_CODE_DIR}/metadef/graph/ge_tensor.cc" + #"${GE_CODE_DIR}/metadef/graph/shape_refiner.cc" + #"${GE_CODE_DIR}/metadef/graph/format_refiner.cc" + #"${GE_CODE_DIR}/metadef/graph/inference_context.cc" + #"${GE_CODE_DIR}/metadef/graph/detail/attributes_holder.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/anchor_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/graph_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/node_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/op_desc_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/type_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/ge_ir_utils.cc" + #"${GE_CODE_DIR}/metadef/graph/utils/tensor_utils.cc" "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" - "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" - "${GE_CODE_DIR}/metadef/graph/utils/transformer_utils.cc" - "${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" - "${GE_CODE_DIR}/metadef/graph/ref_relation.cc" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cpp" - "${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cpp" + #"${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" ) #add_executable(ut_libgraph ${UT_FILES} ${SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) add_executable(ut_libgraph ${UT_FILES} ${SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) -target_compile_options(ut_libgraph PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage -) - target_compile_definitions(ut_libgraph PRIVATE google=ascend_private ) target_link_libraries(ut_libgraph $ + graph gtest gtest_main slog_stub ascend_protobuf c_sec - error_manager_stub - mmpa_stub -lrt -ldl - -lgcov ) diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 8eec3df6..1dfd8bbc 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -245,8 +245,6 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" "${GE_CODE_DIR}/ge/graph/load/new_model_manager/model_utils.cc" @@ -477,8 +475,6 @@ set(GRAPH_PASS_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/reshape_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/resource_pair_add_control_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/resource_pair_remove_control_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transop_breadth_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transop_without_reshape_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transop_depth_fusion_pass.cc" @@ -487,7 +483,7 @@ set(GRAPH_PASS_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/compile_nodes_pass.cc" "${GE_CODE_DIR}/ge/graph/common/transop_util.cc" "${GE_CODE_DIR}/ge/graph/passes/flow_ctrl_pass.cc" - #"${GE_CODE_DIR}/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc" + "${GE_CODE_DIR}/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/folding_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/variable_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transpose_transdata_pass.cc" @@ -675,13 +671,13 @@ set(MULTI_PARTS_TEST_FILES ) set(SINGLE_OP_TEST_FILES - #"single_op/single_op_model_unittest.cc" + "single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" "single_op/stream_resource_unittest.cc" ) set(PROFILING_MNG_TEST_FILES - #"profiling/ge_profiling_manager_unittest.cc" + "profiling/ge_profiling_manager_unittest.cc" ) set(OTHERS_TEST_FILES @@ -848,17 +844,13 @@ add_executable(ut_libge_multiparts_utest ${MULTI_PARTS_TEST_FILES} ) -target_compile_options(ut_libge_multiparts_utest PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage -) - target_compile_definitions(ut_libge_multiparts_utest PRIVATE google=ascend_private ) target_link_libraries(ut_libge_multiparts_utest $ - ge_build_common ge_load_common ge_execute_common ge_optimize_common ge_partition_common ge_prepare_common ge_single_op ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov + ge_build_common ge_load_common ge_execute_common ge_optimize_common ge_partition_common ge_prepare_common ge_single_op ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl ) # libge_others_utest @@ -869,14 +861,9 @@ add_executable(ut_libge_others_utest ${EXECUTE_TEST_FILES} ${OTHERS_TEST_FILES} ) - -target_compile_options(ut_libge_others_utest PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage -) - target_link_libraries(ut_libge_others_utest $ - ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov + ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl ) # libge_kernel_utest @@ -886,14 +873,9 @@ add_executable(ut_libge_kernel_utest ${KERNEL_TEST_FILES} ${KERNEL_SRC_FILES} ) - -target_compile_options(ut_libge_kernel_utest PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage -) - target_link_libraries(ut_libge_kernel_utest $ - ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov + ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl ) # libge_distinct_load_utest @@ -905,10 +887,6 @@ add_executable(ut_libge_distinct_load_utest ${PROFILING_MNG_TEST_FILES} ) -target_compile_options(ut_libge_distinct_load_utest PRIVATE - -g --coverage -fprofile-arcs -ftest-coverage -) - target_compile_definitions(ut_libge_distinct_load_utest PRIVATE google=ascend_private ) @@ -919,5 +897,5 @@ target_link_libraries(ut_libge_distinct_load_utest ge_execute_common ge_ut_common_format ge_load_common ge_single_op ge_prepare_common ge_optimize_common ge_build_common ge_partition_common ge_ut_common - gtest gtest_main ascend_protobuf json c_sec -lrt -ldl -lpthread -lgcov + gtest gtest_main ascend_protobuf json c_sec -lrt -ldl -lpthread ) diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index f53a0732..1035d00d 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -147,7 +147,6 @@ class UtestMemoryAssignerTest : public testing::Test { void TearDown() { GetContext().out_nodes_map.clear(); } }; -/* TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { ge::ComputeGraphPtr graph = make_shared(""); ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); @@ -161,7 +160,6 @@ TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { delete memory_block; } -*/ namespace ge { diff --git a/tests/ut/ge/graph/passes/folding_kernel/broadcast_args_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/broadcast_args_kernel_unittest.cc index 3a9f758b..7990a117 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/broadcast_args_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/broadcast_args_kernel_unittest.cc @@ -52,6 +52,7 @@ using namespace testing; using namespace ge; +using namespace cce; using namespace ge::test; #define TEST_OPERATOR(op_, input_shapes, output_shapes) \ diff --git a/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc index bb021589..e8d15291 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc @@ -52,6 +52,7 @@ using namespace testing; using namespace ge; +using namespace cce; class UtestBroadcastGradientArgsKernel : public testing::Test { protected: diff --git a/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc index 65faad20..7705f986 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc @@ -53,6 +53,7 @@ using namespace testing; using namespace ge; +using namespace cce; using namespace ge::test; class UtestEmptyKernel : public testing::Test { diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc index b51908e2..8058279f 100644 --- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc @@ -38,7 +38,6 @@ #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph_builder_utils.h" -#include "cce/dnn.h" #include "cce/dnn_struct_base.hpp" #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" diff --git a/tests/ut/ge/graph_ir/ge_operator_factory_unittest.cc b/tests/ut/ge/graph_ir/ge_operator_factory_unittest.cc index 97be491a..64f76515 100644 --- a/tests/ut/ge/graph_ir/ge_operator_factory_unittest.cc +++ b/tests/ut/ge/graph_ir/ge_operator_factory_unittest.cc @@ -84,7 +84,7 @@ TEST(UtestGeOperatorFactory, register_func) { status = OperatorFactoryImpl::RegisterVerifyFunc("ABC", nullptr); EXPECT_EQ(GRAPH_SUCCESS, status); } -/* + TEST(UtestGeOperatorFactory, get_ops_type_list_fail) { auto operator_creators_temp = OperatorFactoryImpl::operator_creators_; OperatorFactoryImpl::operator_creators_ = nullptr; @@ -92,5 +92,4 @@ TEST(UtestGeOperatorFactory, get_ops_type_list_fail) { graphStatus status = OperatorFactoryImpl::GetOpsTypeList(all_ops); EXPECT_EQ(GRAPH_FAILED, status); OperatorFactoryImpl::operator_creators_ = operator_creators_temp; -} -*/ +} \ No newline at end of file diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index b6b97d89..7543b212 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -17,7 +17,7 @@ #include #include -//#include "cce/taskdown_common.hpp" +#include "cce/taskdown_common.hpp" #include "graph/load/new_model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "runtime/rt.h" diff --git a/tests/ut/ge/single_op/stream_resource_unittest.cc b/tests/ut/ge/single_op/stream_resource_unittest.cc index b7306815..8cc137dc 100644 --- a/tests/ut/ge/single_op/stream_resource_unittest.cc +++ b/tests/ut/ge/single_op/stream_resource_unittest.cc @@ -58,7 +58,6 @@ TEST_F(UtestStreamResource, test_malloc_memory) { ASSERT_NE(res.MallocMemory(purpose, 100), nullptr); } -/* TEST_F(UtestStreamResource, test_do_malloc_memory) { size_t max_allocated = 0; vector allocated; @@ -84,4 +83,3 @@ TEST_F(UtestStreamResource, test_do_malloc_memory) { rtFree(res); } } -*/ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index e491d43f..972f470c 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -33,15 +33,6 @@ extern "C" { -/** - * @brief Get the rank number in the group. - * - * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. - * @return HcclResult - */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); - /** * @brief Get the rank number in the group. * @@ -51,15 +42,6 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); */ HcclResult HcomGetRankSize(const char *group, u32 *rankSize); -/** - * @brief Get the rank number of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. - * @return HcclResult - */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); - /** * @brief Get the rank number of this rank's server within the group. * @@ -69,15 +51,6 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); */ HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); -/** - * @brief Get the rank id of this rank. - * - * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. - * @return HcclResult - */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); - /** * @brief Get the rank id of this rank. * @@ -87,15 +60,6 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); */ HcclResult HcomGetRankId(const char *group, u32 *rankId); -/** - * @brief Get the local rank id of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. - * @return HcclResult - */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); - /** * @brief Get the local rank id of this rank's server within the group. * @@ -105,16 +69,6 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); */ HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); -/** - * @brief Get the world rank id according to the group rank id. - * - * @param group A string identifying the group name. - * @param groupRank An integer(u32) identifying the group rank id. - * @param worldRank A pointer identifying the world rank id. - * @return HcclResult - */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); - /** * @brief Get the world rank id according to the group rank id. * @@ -125,16 +79,6 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); -/** - * @brief Get the group rank id according to the world rank id. - * - * @param worldRank An integer(u32) identifying the world rank id. - * @param group A string identifying the group name. - * @param groupRank A pointer identifying the group rank id. - * @return HcclResult - */ -HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); - /** * @brief Get the group rank id according to the world rank id. * @@ -145,16 +89,6 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, */ HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); -/** - * @brief Create group. - * - * @param group A string identifying the group name. - * @param rankNum An integer(u32) identifying the number of ranks in the group. - * @param rankIds A list identifying the ranks in the group. - * @return HcclResult - */ -HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); - /** * @brief Create group. * @@ -165,14 +99,6 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); */ HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); -/** - * @brief Destroy group - * - * @param group A string identifying the group name. - * @return HcclResult - */ -HcclResult hcom_destroy_group(const char *group); - /** * @brief Destroy group * @@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group); * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** - * @brief Set the gradient split strategy with in the group, according to gradient index. + * @brief Set the gradient split strategy with in the group, according to gradient data size. * * @param group A string identifying the group name. * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param IdxList A list identifying the index of end gradient in each segment. + * @param sizeList A list identifying the percent of each segment. * @return HcclResult */ -extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Initialize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecInitialize(); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Finalize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecFinalize(); /** - * @brief Register memories and init resources for remote access. + * @brief Put collective communication operation into hcom executor. * - * @param addrList memory addresses for remote access. - * @param count number of remote memory addresses. + * @param opInfo information about collective communication operation. + * @param callback callback after collective communication operation. * @return HcclResult */ -extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +/** + * @brief Put remote access operation into hcom executor. + * + * @param remoteAccessType operation type (read or write). + * @param addrInfos address information about collective communication operation. + * @param callback callback after collective communication operation. + * @return HcclResult + */ +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); /** * @brief Register memories and init resources for remote access. @@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis */ extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); -HcclResult HcomExecInitialize(); - -HcclResult HcomExecFinalize(); - -HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); - -HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, - const std::vector& addrInfos, - std::function callback); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index ad48f70b..66638bbb 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -50,7 +50,7 @@ typedef int (*mmFilter)(const mmDirent *entry); typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); -typedef size_t mmSize_t; +typedef size_t mmSize_t; //lint !e410 !e1051 typedef off_t mmOfft_t; typedef pid_t mmPid_t; typedef long MM_LONG; diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index bce58f32..7c4f7be2 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,7 +18,9 @@ #define D_SYSLOG_H_ #ifdef __cplusplus +#ifndef LOG_CPP extern "C" { +#endif #endif // __cplusplus #ifndef LINUX @@ -105,6 +107,7 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) +#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -116,6 +119,18 @@ typedef struct tagKV { char *value; } KeyValue; +typedef enum { + APPLICATION = 0, + SYSTEM +} ProcessType; + +typedef struct { + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; +} LogAttr; + /** * @ingroup slog * @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); +/** + * @ingroup slog + * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttr(LogAttr logAttr); + /** * @ingroup slog * @brief dlog_error: print error log @@ -367,6 +390,121 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus +#ifndef LOG_CPP } +#endif // LOG_CPP #endif // __cplusplus + +#ifdef LOG_CPP +#ifdef __cplusplus +extern "C" { +#endif +/** + * @ingroup slog + * @brief DlogGetlevelForC: get module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid + * @param [out]enableEvent: 1: enable; 0: disable + * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + */ +DLL_EXPORT int DlogGetlevelForC(int moduleId, int *enableEvent); + +/** + * @ingroup slog + * @brief DlogSetlevelForC: set module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid + * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + * @param [in]enableEvent: 1: enable; 0: disable, others:invalid + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetlevelForC(int moduleId, int level, int enableEvent); + +/** + * @ingroup slog + * @brief CheckLogLevelForC: check module level enable or not + * users no need to call it because all dlog interface(include inner interface) has already called + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG + * @return: 1:enable, 0:disable + */ +DLL_EXPORT int CheckLogLevelForC(int moduleId, int logLevel); + +/** + * @ingroup slog + * @brief DlogSetAttrForC: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); + +/** + * @ingroup slog + * @brief DlogForC: print log, need caller to specify level + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogForC(moduleId, level, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogSubForC: print log, need caller to specify level and submodule + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]submodule: eg: engine + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogSubForC(moduleId, submodule, level, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogWithKVForC: print log, need caller to specify level and other paramters + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]pstKVArray: key-value array + * @param [in]kvNum: key-value element num in array + * @param [in]fmt: log content + */ +#define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogFlushForC: flush log buffer to file + */ +DLL_EXPORT void DlogFlushForC(void); + +/** + * @ingroup slog + * @brief Internal log interface, other modules are not allowed to call this interface + */ +void DlogInnerForC(int moduleId, int level, const char *fmt, ...); +void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); + +#ifdef __cplusplus +} +#endif +#endif // LOG_CPP #endif // D_SYSLOG_H_