From 94facf5ebe5e5b222fa06b3fdb4aa1db54215da1 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 19 Nov 2020 17:22:36 +0800 Subject: [PATCH] Synchronize latest Ascend software suite 19 Nov 2020 --- inc/external/ge/ge_api.h | 19 ++ inc/external/ge/ge_api_types.h | 37 ++- inc/external/graph/ascend_string.h | 12 + inc/external/register/register.h | 4 + inc/framework/common/debug/log.h | 27 ++ inc/framework/common/types.h | 1 + inc/framework/executor/ge_executor.h | 2 + inc/graph/debug/ge_attr_define.h | 3 + src/common/graph/CMakeLists.txt | 1 + src/common/graph/ascend_string.cc | 62 +++++ src/common/graph/format_refiner.cc | 7 +- src/common/graph/ge_attr_define.cc | 3 + src/common/graph/graph.mk | 20 +- src/common/graph/op_desc.cc | 14 + src/common/graph/operator.cc | 5 +- src/common/graph/shape_refiner.cc | 14 +- src/common/graph/transformer/inc/axis_util.h | 144 +++++++++++ .../inc/transfer_shape_according_to_format.h | 122 +++++++++ .../graph/transformer/src/axis_util.cpp | 198 ++++++++++++++ .../transfer_shape_according_to_format.cpp | 242 ++++++++++++++++++ src/common/graph/utils/transformer_utils.cc | 160 ++++++++++++ src/common/graph/utils/transformer_utils.h | 50 ++++ src/ge/client/ge_api.cc | 27 ++ .../format_transfers/datatype_transfer.cc | 25 +- .../format_transfer_c1hwncoc0_hwcn.cc | 18 +- .../format_transfer_dhwcn_fracz3D.cc | 6 +- ...format_transfer_dhwnc_fracz3D_transpose.cc | 6 +- .../format_transfer_fractal_nz.cc | 18 +- .../format_transfer_fractal_z.cc | 12 +- .../format_transfer_fractal_zz.cc | 19 +- .../format_transfer_fracz_hwcn.cc | 13 +- .../format_transfer_fracz_nchw.cc | 8 +- .../format_transfer_fracz_nhwc.cc | 8 +- .../format_transfer_hwcn_c1hwncoc0.cc | 8 +- .../format_transfer_nc1hwc0_nchw.cc | 8 +- .../format_transfer_nc1hwc0_nhwc.cc | 8 +- .../format_transfer_nchw_fz_c04.cc | 6 +- .../format_transfer_nchw_nc1hwc0.cc | 8 +- .../format_transfer_nhwc_nc1hwc0.cc | 8 +- .../format_transfer_transpose.cc | 42 +-- src/ge/common/formats/formats.cc | 22 +- .../formats/utils/formats_trans_utils.cc | 39 ++- .../formats/utils/formats_trans_utils.h | 5 + src/ge/common/ge_common.mk | 13 +- src/ge/common/profiling/profiling_manager.cc | 120 ++++++++- src/ge/common/profiling/profiling_manager.h | 21 +- src/ge/common/types.cc | 1 + src/ge/executor/ge_executor.cc | 13 + src/ge/executor/module.mk | 8 +- src/ge/ge_inference.mk | 27 +- src/ge/ge_local_engine/module.mk | 9 +- src/ge/ge_runner.mk | 19 +- src/ge/graph/build/label_allocator.cc | 31 ++- src/ge/graph/build/label_allocator.h | 2 +- .../graph/build/logical_stream_allocator.cc | 6 +- .../graph/build/memory/block_mem_assigner.cc | 9 + src/ge/graph/build/memory/module.mk | 9 +- src/ge/graph/build/model_builder.cc | 31 +-- src/ge/graph/common/transop_util.cc | 9 + src/ge/graph/common/transop_util.h | 2 + src/ge/graph/label/label_maker.cc | 151 ++++------- src/ge/graph/label/label_maker.h | 5 +- .../load/new_model_manager/data_dumper.h | 1 + .../load/new_model_manager/davinci_model.cc | 161 ++++++------ .../load/new_model_manager/davinci_model.h | 5 +- .../load/new_model_manager/model_manager.cc | 101 +++++++- .../load/new_model_manager/model_manager.h | 4 + .../load/new_model_manager/zero_copy_task.cc | 2 +- .../load/new_model_manager/zero_copy_task.h | 2 +- src/ge/graph/manager/graph_manager.cc | 120 ++++++++- src/ge/graph/manager/graph_manager.h | 10 + src/ge/graph/manager/rdma_pool_allocator.cc | 2 +- .../optimize/mem_rw_conflict_optimize.cc | 5 + src/ge/graph/partition/graph_partition.cc | 19 +- src/ge/graph/partition/graph_partition.h | 2 + src/ge/graph/passes/mark_agnostic_pass.cc | 9 +- src/ge/graph/passes/memcpy_addr_async_pass.cc | 30 ++- src/ge/graph/passes/merge_pass.cc | 30 +++ src/ge/graph/passes/merge_pass.h | 1 + src/ge/graph/passes/net_output_pass.cc | 8 +- src/ge/graph/passes/net_output_pass.h | 1 + src/ge/graph/passes/next_iteration_pass.cc | 11 +- src/ge/graph/preprocess/graph_preprocess.cc | 65 ++++- .../graph/preprocess/insert_op/ge_aipp_op.cc | 185 +++++++------ .../insert_op/util_insert_aipp_op.cc | 71 ++--- .../insert_op/util_insert_aipp_op.h | 1 - .../preprocess/multi_batch_copy_graph.cc | 9 + .../graph/preprocess/multi_batch_options.cc | 4 + src/ge/host_cpu_engine/module.mk | 9 +- src/ge/host_kernels/slice_kernel.cc | 4 +- src/ge/hybrid/common/npu_memory_allocator.cc | 18 +- .../executor/worker/execution_engine.cc | 10 +- src/ge/hybrid/model/hybrid_model_builder.cc | 43 +++- src/ge/hybrid/model/hybrid_model_builder.h | 1 + src/ge/hybrid/model/node_item.cc | 9 +- src/ge/hybrid/model/node_item.h | 3 + .../aicore/aicore_node_executor.cc | 58 +++-- .../aicore/aicore_node_executor.h | 36 ++- .../aicore/aicore_task_compiler.cc | 16 +- .../aicore/aicore_task_compiler.h | 8 +- .../controlop/control_op_executor.cc | 4 +- .../node_executor/hccl/hccl_node_executor.cc | 15 +- src/ge/hybrid/node_executor/node_executor.cc | 2 +- src/ge/hybrid/node_executor/task_context.cc | 20 +- src/ge/init/gelib.cc | 6 + src/ge/ir_build/ge_ir_build.cc | 46 ++++ src/ge/plugin/engine/module.mk | 10 +- src/ge/session/inner_session.cc | 18 ++ src/ge/session/inner_session.h | 2 + src/ge/session/omg.cc | 4 + src/ge/session/session_manager.cc | 30 +++ src/ge/session/session_manager.h | 14 +- src/ge/single_op/single_op.cc | 6 +- src/ge/single_op/task/op_task.cc | 39 ++- src/ge/single_op/task/op_task.h | 6 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 5 - third_party/fwkacllib/inc/runtime/base.h | 10 +- third_party/fwkacllib/inc/runtime/dev.h | 4 +- third_party/fwkacllib/inc/runtime/event.h | 12 +- third_party/fwkacllib/inc/runtime/mem.h | 2 +- third_party/fwkacllib/inc/runtime/rt_model.h | 3 +- third_party/fwkacllib/inc/runtime/stream.h | 12 +- .../fwkacllib/inc/toolchain/prof_acl_api.h | 241 ++++++++++++++++- 123 files changed, 2833 insertions(+), 699 deletions(-) mode change 100755 => 100644 src/common/graph/CMakeLists.txt create mode 100644 src/common/graph/transformer/inc/axis_util.h create mode 100644 src/common/graph/transformer/inc/transfer_shape_according_to_format.h create mode 100644 src/common/graph/transformer/src/axis_util.cpp create mode 100644 src/common/graph/transformer/src/transfer_shape_according_to_format.cpp create mode 100644 src/common/graph/utils/transformer_utils.cc create mode 100644 src/common/graph/utils/transformer_utils.h diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h index b4b9bb2a..8fd4b944 100644 --- a/inc/external/ge/ge_api.h +++ b/inc/external/ge/ge_api.h @@ -59,6 +59,25 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { /// Status AddGraph(uint32_t graphId, const Graph &graph, const std::map &options); + /// + /// @ingroup client + /// @brief add a copy graph with a specific graphId + /// @param [in] graphId graph id + /// @param [in] graph the graph + /// @return Status result of function + /// + Status AddGraphWithCopy(uint32_t graph_id, const Graph &graph); + + /// + /// @ingroup client + /// @brief add a copy graph with a specific graphId and graphOptions + /// @param [in] graphId graph id + /// @param [in] graph the graph + /// @param [in] options graph options + /// @return Status result of function + /// + Status AddGraphWithCopy(uint32_t graph_id, const Graph &graph, const std::map &options); + /// /// @ingroup ge_graph /// @brief remove a graph of the session with specific session id diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 9c51bac0..37e2dccf 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -245,6 +245,12 @@ const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; // 0: close debug; 1: open TBE compiler; 2: open ccec compiler const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; +// Configure model bank path +const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path"; + +// Configure op bank path +const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -315,13 +321,28 @@ static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c static const char *const DEBUG_DIR = ge::DEBUG_DIR; static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; +static const char *const MDL_BANK_PATH_FLAG = ge::MDL_BANK_PATH_FLAG.c_str(); +static const char *const OP_BANK_PATH_FLAG = ge::OP_BANK_PATH_FLAG.c_str(); + // for interface: aclgrphBuildModel -const std::set ir_builder_suppported_options = { - INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, - DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, - INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, - AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, - INPUT_FP16_NODES, LOG_LEVEL}; +const std::set ir_builder_suppported_options = {INPUT_FORMAT, + INPUT_SHAPE, + OP_NAME_MAP, + DYNAMIC_BATCH_SIZE, + DYNAMIC_IMAGE_SIZE, + DYNAMIC_DIMS, + INSERT_OP_FILE, + PRECISION_MODE, + EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, + OUTPUT_TYPE, + OUT_NODES, + INPUT_FP16_NODES, + LOG_LEVEL, + OP_DEBUG_LEVEL, + DEBUG_DIR, + OP_COMPILER_CACHE_DIR, + OP_COMPILER_CACHE_MODE}; // for interface: aclgrphParse const std::set ir_parser_suppported_options = {INPUT_FORMAT, @@ -336,7 +357,9 @@ const std::set ir_parser_suppported_options = {INPUT_FORMAT, OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES, - LOG_LEVEL}; + LOG_LEVEL, + MDL_BANK_PATH_FLAG, + OP_BANK_PATH_FLAG}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, diff --git a/inc/external/graph/ascend_string.h b/inc/external/graph/ascend_string.h index 04bf31ac..7c4c74b3 100644 --- a/inc/external/graph/ascend_string.h +++ b/inc/external/graph/ascend_string.h @@ -31,6 +31,18 @@ class AscendString { const char* GetString() const; + bool operator<(const AscendString& d) const; + + bool operator>(const AscendString& d) const; + + bool operator<=(const AscendString& d) const; + + bool operator>=(const AscendString& d) const; + + bool operator==(const AscendString& d) const; + + bool operator!=(const AscendString& d) const; + private: std::shared_ptr name_; }; diff --git a/inc/external/register/register.h b/inc/external/register/register.h index f9056171..74b282c7 100644 --- a/inc/external/register/register.h +++ b/inc/external/register/register.h @@ -94,6 +94,7 @@ using FusionParseParamFunc = std::function, ge::Operator &)>; using FusionParseParamByOpFunc = std::function &, ge::Operator &)>; using ParseSubgraphFunc = std::function; +using ParseOpToGraphFunc = std::function; class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { public: @@ -125,6 +126,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { OpRegistrationData &InputReorderVector(const vector &input_order); + OpRegistrationData &ParseOpToGraphFn(const ParseOpToGraphFunc &parse_op_to_graph_fn); + domi::ImplyType GetImplyType() const; std::string GetOmOptype() const; std::set GetOriginOpTypeSet() const; @@ -134,6 +137,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { FusionParseParamFunc GetFusionParseParamFn() const; FusionParseParamByOpFunc GetFusionParseParamByOpFn() const; ParseSubgraphFunc GetParseSubgraphPostFn() const; + ParseOpToGraphFunc GetParseOpToGraphFn() const; private: std::shared_ptr impl_; diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index dbf22ead..249271a6 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -18,10 +18,12 @@ #define INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ #include +#include #include "runtime/rt.h" #include "common/string_util.h" #include "common/util.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "ge/ge_api_error_codes.h" @@ -253,4 +255,29 @@ exec_expr1; \ } +#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ + { \ + GELOGE(_status, "%s", errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + } + +#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, "%s", errormsg); \ + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ + return _status; \ + } \ + } while (0) + +template +std::string FmtToStr(const T &t) { + std::string fmt; + std::stringstream st; + st << "[" << t << "]"; + fmt = st.str(); + return fmt; +} + #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index e5043d94..038b1cf6 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -70,6 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map PROFILE_COMPONENT_MAP; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG; +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 6e82bb96..17dbf928 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -270,6 +270,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status ReleaseSingleOpResource(void *stream); + static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id); + ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index a76008a3..b85844d6 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -1115,6 +1115,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYN GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_DATATYPE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_FORMAT; +// atc user def dtype&format +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_OUTPUT_NODES; + // for fusion op plugin GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; diff --git a/src/common/graph/CMakeLists.txt b/src/common/graph/CMakeLists.txt old mode 100755 new mode 100644 index bb63eb81..1c8a26b4 --- a/src/common/graph/CMakeLists.txt +++ b/src/common/graph/CMakeLists.txt @@ -42,6 +42,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "detail/*.cc" "debug/*.cc" "option/*.cc" + "transformer/src/*cc" ) # include directories diff --git a/src/common/graph/ascend_string.cc b/src/common/graph/ascend_string.cc index 597b634f..b1616856 100644 --- a/src/common/graph/ascend_string.cc +++ b/src/common/graph/ascend_string.cc @@ -30,4 +30,66 @@ const char* AscendString::GetString() const { return (*name_).c_str(); } + +bool AscendString::operator<(const AscendString& d) const { + if (name_ == nullptr && d.name_ == nullptr) { + return false; + } else if (name_ == nullptr) { + return true; + } else if (d.name_ == nullptr) { + return false; + } + return (*name_ < *(d.name_)); +} + +bool AscendString::operator>(const AscendString& d) const { + if (name_ == nullptr && d.name_ == nullptr) { + return false; + } else if (name_ == nullptr) { + return false; + } else if (d.name_ == nullptr) { + return true; + } + return (*name_ > *(d.name_)); +} + +bool AscendString::operator==(const AscendString& d) const { + if (name_ == nullptr && d.name_ == nullptr) { + return true; + } else if (name_ == nullptr) { + return false; + } else if (d.name_ == nullptr) { + return false; + } + return (*name_ == *(d.name_)); +} + +bool AscendString::operator<=(const AscendString& d) const { + if (name_ == nullptr) { + return true; + } else if (d.name_ == nullptr) { + return false; + } + return (*name_ <= *(d.name_)); +} + +bool AscendString::operator>=(const AscendString& d) const { + if (d.name_ == nullptr) { + return true; + } else if (name_ == nullptr) { + return false; + } + return (*name_ >= *(d.name_)); +} + +bool AscendString::operator!=(const AscendString& d) const { + if (name_ == nullptr && d.name_ == nullptr) { + return false; + } else if (name_ == nullptr) { + return true; + } else if (d.name_ == nullptr) { + return true; + } + return (*name_ != *(d.name_)); +} } // namespace ge diff --git a/src/common/graph/format_refiner.cc b/src/common/graph/format_refiner.cc index 9a072849..b29aa03b 100644 --- a/src/common/graph/format_refiner.cc +++ b/src/common/graph/format_refiner.cc @@ -384,12 +384,15 @@ void FormatRefiner::RefreshOriginFormatOfAnchor(std::vector &anchor continue; } for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDescPtr()) { - if (input_desc != nullptr) { + // single op support private format set, its origin format should not be override + auto ori_format = input_desc->GetOriginFormat(); + if (input_desc != nullptr && (ori_format == FORMAT_ND || ori_format == FORMAT_RESERVED)) { input_desc->SetOriginFormat(input_desc->GetFormat()); } } for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDescPtr()) { - if (output_desc != nullptr) { + auto ori_format = output_desc->GetOriginFormat(); + if (output_desc != nullptr && (ori_format == FORMAT_ND || ori_format == FORMAT_RESERVED)) { output_desc->SetOriginFormat(output_desc->GetFormat()); } } diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 2cc447f8..bb4be1e1 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -1078,6 +1078,9 @@ const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end"; const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; +// atc user def dtype&format +const std::string ATTR_ATC_USER_DEFINE_OUTPUT_NODES = "_user_defined_output_nodes"; + // for fusion op plugin const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE = "_fusionop_original_type"; diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk index 4221da38..14d05ee8 100644 --- a/src/common/graph/graph.mk +++ b/src/common/graph/graph.mk @@ -46,6 +46,10 @@ COMMON_LOCAL_SRC_FILES := \ option/ge_local_context.cc \ ./runtime_inference_context.cc \ ./utils/node_utils.cc \ + ../third_party/transformer/src/axis_util.cpp \ + ../third_party/transformer/src/transfer_shape_according_to_format.cpp \ + ./utils/transformer_utils.cc \ + COMMON_LOCAL_C_INCLUDES := \ proto/om.proto \ @@ -57,13 +61,19 @@ COMMON_LOCAL_C_INCLUDES := \ proto/op_mapping_info.proto \ proto/dump_task.proto \ inc \ + metadef/inc \ + graphengine/inc \ inc/external \ - inc/external/graph \ - inc/graph \ - inc/common \ - common \ - common/graph \ + metadef/inc/external \ + graphengine/inc/external \ + metadef/inc/external/graph \ + metadef/inc/graph \ + metadef/inc/common \ + metadef \ + metadef/graph \ third_party/protobuf/include \ + $(TOPDIR)metadef/third_party \ + $(TOPDIR)metadef/third_party/transformer/inc \ libc_sec/include \ ops/built-in/op_proto/inc \ cann/ops/built-in/op_proto/inc \ diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index d568d491..8202736d 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -27,6 +27,7 @@ #include "graph/utils/attr_utils.h" #include "graph/utils/ge_ir_utils.h" #include "graph/utils/op_desc_utils.h" +#include "graph/utils/transformer_utils.h" #include "proto/ge_ir.pb.h" using std::make_pair; @@ -1301,11 +1302,24 @@ graphStatus OpDesc::CallInferFunc(Operator &op) { return GRAPH_PARAM_INVALID; } } + std::unique_ptr transformer(new (std::nothrow) NodeShapeTransUtils(shared_from_this())); + if (transformer == nullptr) { + GELOGE(GRAPH_FAILED, "Memory alloc failed"); + return GRAPH_FAILED; + } + if (!transformer->CatchFormatAndShape()) { + GELOGE(GRAPH_FAILED, "catch format and shape info failed!"); + return GRAPH_FAILED; + } graphStatus graph_status = (graphStatus)infer_func_(op); if (graph_status != GRAPH_SUCCESS) { GELOGE(GRAPH_FAILED, "%s call infer func. ret: %u", GetName().c_str(), graph_status); return GRAPH_FAILED; } + if (!transformer->UpdateFormatAndShape()) { + GELOGE(GRAPH_FAILED, "catch format and shape info failed!"); + return GRAPH_FAILED; + } return GRAPH_SUCCESS; } graphStatus OpDesc::CallInferFormatFunc(Operator &op) { diff --git a/src/common/graph/operator.cc b/src/common/graph/operator.cc index d196a3a7..b449725c 100644 --- a/src/common/graph/operator.cc +++ b/src/common/graph/operator.cc @@ -1425,7 +1425,10 @@ class GraphBuilderImpl { const string name = node->GetName(); for (auto &name_idx : op_impl->op_desc_->GetSubgraphNameIndexes()) { const SubgraphBuilder &builder = op_impl->GetSubgraphBuilder(name_idx.first); - GE_CHK_BOOL_EXEC(builder != nullptr, return GRAPH_FAILED, "Node: %s, Get builder failed.", name.c_str()); + if (builder == nullptr) { + GELOGW("Node: %s, Has no builder.", name.c_str()); + continue; + } Graph graph = builder(); // Build subgraph from user define builder. const ComputeGraphPtr &subgraph = GraphUtils::GetComputeGraph(graph); diff --git a/src/common/graph/shape_refiner.cc b/src/common/graph/shape_refiner.cc index e3643e7b..30cf8383 100644 --- a/src/common/graph/shape_refiner.cc +++ b/src/common/graph/shape_refiner.cc @@ -26,6 +26,7 @@ #include "debug/ge_log.h" #include "debug/ge_op_types.h" +#include "debug/ge_util.h" #include "external/graph/operator.h" #include "external/graph/operator_factory.h" #include "framework/common/debug/ge_log.h" @@ -41,7 +42,6 @@ const uint32_t kWhileBodySubGraphIdx = 1; graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) { GELOGD("Enter reverse brush while body subgraph process!"); - auto sub_graph_body = NodeUtils::GetSubgraph(*node, kWhileBodySubGraphIdx); if (sub_graph_body == nullptr) { GELOGE(GRAPH_FAILED, "Get while body graph failed!"); @@ -661,10 +661,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh if (!is_unknown_graph) { auto inference_context = CreateInferenceContext(context_map, node); - if (inference_context == nullptr) { - GELOGE(GRAPH_FAILED, "inference context is null"); - return GRAPH_FAILED; - } + GE_CHECK_NOTNULL(inference_context); GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); op.SetInferenceContext(inference_context); } @@ -678,8 +675,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh auto op_desc = node->GetOpDesc(); for (const auto &out_anchor : node->GetAllOutDataAnchors()) { auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); - ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast(output_tensor->GetShape().GetDims().size())); - output_tensor->SetOriginShape(output_tensor->GetShape()); + if (output_tensor->MutableShape().GetDims().empty()) { + output_tensor->SetOriginShape(output_tensor->GetShape()); + } + ge::TensorUtils::SetRealDimCnt(*output_tensor, + static_cast(output_tensor->GetOriginShape().GetDims().size())); output_tensor->SetOriginDataType(output_tensor->GetDataType()); GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s", diff --git a/src/common/graph/transformer/inc/axis_util.h b/src/common/graph/transformer/inc/axis_util.h new file mode 100644 index 00000000..c18c199a --- /dev/null +++ b/src/common/graph/transformer/inc/axis_util.h @@ -0,0 +1,144 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file axis_util.h + * \brief get the axis value + */ +#ifndef COMMON_UTILS_TRANSFER_AXIS_UTIL_H_ +#define COMMON_UTILS_TRANSFER_AXIS_UTIL_H_ + +#include +#include +#include + +#include "external/graph/ge_error_codes.h" +#include "external/graph/types.h" +#include "framework/common/debug/ge_log.h" + +namespace common { +namespace transformer { + +const int32_t DIM_DEFAULT_SIZE = 4; +const uint32_t NCHW_DIMENSION_NUM = 4; + +const int32_t AXIS_NCHW_DIM_N = 0; +const int32_t AXIS_NCHW_DIM_C = 1; +const int32_t AXIS_NCHW_DIM_H = 2; +const int32_t AXIS_NCHW_DIM_W = 3; + +const int32_t AXIS_NHWC_DIM_N = 0; +const int32_t AXIS_NHWC_DIM_H = 1; +const int32_t AXIS_NHWC_DIM_W = 2; +const int32_t AXIS_NHWC_DIM_C = 3; + +const int32_t AXIS_NC1HWC0_DIM_N = 0; +const int32_t AXIS_NC1HWC0_DIM_C1 = 1; +const int32_t AXIS_NC1HWC0_DIM_C0 = 4; +const int32_t AXIS_NC1HWC0_DIM_H = 2; +const int32_t AXIS_NC1HWC0_DIM_W = 3; + +const int32_t AXIS_HWCN_DIM_H = 0; +const int32_t AXIS_HWCN_DIM_W = 1; +const int32_t AXIS_HWCN_DIM_C = 2; +const int32_t AXIS_HWCN_DIM_N = 3; + +const int32_t AXIS_C1HWNCoC0_DIM_C1 = 0; +const int32_t AXIS_C1HWNCoC0_DIM_H = 1; +const int32_t AXIS_C1HWNCoC0_DIM_W = 2; +const int32_t AXIS_C1HWNCoC0_DIM_N = 3; +const int32_t AXIS_C1HWNCoC0_DIM_Co = 4; +const int32_t AXIS_C1HWNCoC0_DIM_C0 = 5; + +#define CHECK_NOTNULL(val) \ + do { \ + if ((val) == nullptr) { \ + GELOGE(GRAPH_FAILED, "[ERROR]Parameter[%s] must not be null.", #val); \ + return false; \ + } \ + } while (0) + +#define CHECK(cond, log_func, return_expr) \ + do { \ + if (cond) { \ + log_func; \ + return_expr; \ + } \ + } while (0) + +enum AxisValueType { + AXIS_N = 0, + AXIS_C = 1, + AXIS_H = 2, + AXIS_W = 3, + AXIS_C1 = 4, + AXIS_C0 = 5, + AXIS_Co = 6, + AXIS_D = 7, + AXIS_BOTTOM = 8 +}; + +int64_t DivisionCeiling(int64_t dividend, int64_t divisor); + +/* Axis value is arranged as {N,C,H,W,C1,C0,...} */ +/* The first parameter is old shape's dimension, + * second is c0 and third is axis value. */ +using GetAxisValueInfoByFormat = + std::function&, const uint32_t&, std::vector&, std::vector&)>; + +using GetAxisValueInfoByFormatPtr = std::shared_ptr; + +class AxisUtil { + public: + AxisUtil(); + ~AxisUtil(){}; + bool GetAxisValueByOriginFormat(const ge::Format& format, const std::vector& dimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + bool HasAxisValueFunc(const ge::Format& format); + + private: + static bool CheckParams(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByNCHW(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByNHWC(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByNC1HWC0(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByFz(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByHWCN(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByND(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + static bool GetAxisValueByC1HWNCoC0(const std::vector& originalDimVec, const uint32_t& c0, + std::vector& axisValue, std::vector& ndValue); + + /* map of GetAxisValueInfoByFormat, get axis value by different original + * formats. */ + std::map getAxisValueFuncMap; +}; +} // namespace transformer +} // namespace common + +#endif // COMMON_UTILS_TRANSFER_AXIS_UTIL_H_ diff --git a/src/common/graph/transformer/inc/transfer_shape_according_to_format.h b/src/common/graph/transformer/inc/transfer_shape_according_to_format.h new file mode 100644 index 00000000..7fd81476 --- /dev/null +++ b/src/common/graph/transformer/inc/transfer_shape_according_to_format.h @@ -0,0 +1,122 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file transfer_shape_according_to_format.h + * \brief set shape according to original format and current format + */ +#ifndef COMMON_UTILS_TRANSFER_SHAPE_ACCORDING_TO_FORMAT_H_ +#define COMMON_UTILS_TRANSFER_SHAPE_ACCORDING_TO_FORMAT_H_ + +#include "transformer/inc/axis_util.h" + +#include +#include +#include + +#include "graph/types.h" +#include "graph/utils/op_desc_utils.h" + +namespace common { +namespace transformer { + +enum OpImplType { + EN_IMPL_CUSTOM_CONSTANT_CCE = 0, // custom constant op + EN_IMPL_CUSTOM_TIK, // custom tik op + EN_IMPL_CUSTOM_TBE, // custom tbe op + EN_IMPL_HW_CONSTANT_CCE, // Huawei built-in constant op + EN_IMPL_HW_GENERAL_CCE, // Huawei built-in cce op + EN_IMPL_HW_TIK, // Huawei built-in tik op + EN_IMPL_HW_TBE, // Huawei built-in tbe op + EN_IMPL_RL, // RL op + EN_IMPL_PLUGIN_TBE, // Huawei built-in tbe plugin op + EN_IMPL_VECTOR_CORE_HW_TBE, // Huawei built-in tbe op + EN_IMPL_VECTOR_CORE_CUSTOM_TBE, // custom tbe op + EN_IMPL_NON_PERSISTENT_CUSTOM_TBE, // custom tbe op + EN_RESERVED // reserved value +}; + +const uint32_t SHAPE_NUMBER_16 = 16; +const uint32_t SHAPE_NUMBER_32 = 32; +const uint32_t SHAPE_DIM_VALUE_C04 = 4; +const uint32_t NI = 16; +const uint32_t MINUS_VALUE_ONE = 1; +const uint32_t MINUS_VALUE_TWO = 2; +const uint32_t SIZE_OF_CN = 2; +const uint32_t MINIMUM_NZ_SHAPE_DIM_NUM = 2; + +/* The first parameter is axis value, second is new shape and third is + * op implementation type. */ +using GetNewShapeByAxisValueAndFormat = + std::function &, const int64_t &, vector &, vector &)>; + +using GetNewShapeByAxisValueAndFormatPtr = std::shared_ptr; + +struct ShapeAndFormatInfo { + const std::vector &oldShape; + std::vector &newShape; + const ge::Format &oldFormat; + const ge::Format &newFormat; + const ge::DataType ¤tDataType; + const int64_t &opImplType; +}; + +using ShapeAndFormat = struct ShapeAndFormatInfo; + +class ShapeTransferAccordingToFormat { + public: + ShapeTransferAccordingToFormat(); + + ~ShapeTransferAccordingToFormat(){}; + + ShapeTransferAccordingToFormat(const ShapeTransferAccordingToFormat &) = delete; + + ShapeTransferAccordingToFormat &operator=(const ShapeTransferAccordingToFormat &) = delete; + + bool GetShapeAccordingToFormat(ShapeAndFormat &inputAndOutputInfo, int64_t *c = nullptr); + + /* ----------Below is the function of getting new shape---------------------- */ + static bool GetNCHWShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + static bool GetNHWCShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + static bool GetNC1HWC0ShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + static bool GetFzShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + static bool GetHWCNShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + static bool GetC1HWNCoC0ShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + static bool GetNzShapeByAxisValue(vector &newShape, const int64_t &implType, + const vector &axisValue, const vector &ndValue); + + private: + /* map of GetAxisValueInfoByFormat, get axis value by different original + * formats. */ + std::map getNewShapeFuncMap; + std::map mapOfDtypeAndC0; +}; +} // namespace transformer +} // namespace common + +#endif // COMMON_UTILS_TRANSFER_SHAPE_ACCORDING_TO_FORMAT_H_ diff --git a/src/common/graph/transformer/src/axis_util.cpp b/src/common/graph/transformer/src/axis_util.cpp new file mode 100644 index 00000000..164a6b99 --- /dev/null +++ b/src/common/graph/transformer/src/axis_util.cpp @@ -0,0 +1,198 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file axis_util.cpp + * \brief get the axis value + */ +#include "transformer/inc/axis_util.h" +#include "graph/types.h" + +namespace common { +namespace transformer { +using namespace ge; +using namespace std; + +AxisUtil::AxisUtil() { + getAxisValueFuncMap = {{FORMAT_NCHW, std::make_shared(GetAxisValueByNCHW)}, + {FORMAT_NHWC, std::make_shared(GetAxisValueByNHWC)}, + {FORMAT_NC1HWC0, std::make_shared(GetAxisValueByNC1HWC0)}, + {FORMAT_HWCN, std::make_shared(GetAxisValueByHWCN)}, + {FORMAT_ND, std::make_shared(GetAxisValueByND)}, + {FORMAT_C1HWNCoC0, std::make_shared(GetAxisValueByC1HWNCoC0)}}; +} + +int64_t DivisionCeiling(int64_t dividend, int64_t divisor) { + if (divisor == 0) { + return 0; + } else { + return (dividend + divisor - 1) / divisor; + } +} + +bool AxisUtil::GetAxisValueByOriginFormat(const Format &format, const vector &dimVec, const uint32_t &c0, + vector &axisValue, vector &ndValue) { + auto iterGetAxisFunc = getAxisValueFuncMap.find(format); + if (iterGetAxisFunc == getAxisValueFuncMap.end()) { + GELOGI("Can not get axis value of old format %u!", format); + return false; + } + GetAxisValueInfoByFormatPtr getAxisFunc = iterGetAxisFunc->second; + CHECK_NOTNULL(getAxisFunc); + return (*getAxisFunc)(dimVec, c0, axisValue, ndValue); +} + +bool AxisUtil::HasAxisValueFunc(const Format &format) { + auto iterGetAxisFunc = getAxisValueFuncMap.find(format); + if (iterGetAxisFunc == getAxisValueFuncMap.end()) { + GELOGI("Can not get axis value of format %u!", format); + return false; + } + return true; +} + +bool AxisUtil::CheckParams(const vector &originalDimVec, const uint32_t &c0, vector &axisValue, + vector &ndValue) { + ndValue = originalDimVec; + auto dimSize = originalDimVec.size(); + if (dimSize < DIM_DEFAULT_SIZE) { + /* Before this funcion, we should call function PadDimensionTo4. */ + GELOGI("Dimension size %zu is invalid.", dimSize); + return false; + } + if (c0 == 0) { + GELOGE(GRAPH_FAILED, "[ERROR]c0 is zero!"); + return false; + } + + return true; +} + +bool AxisUtil::GetAxisValueByND(const vector &originalDimVec, const uint32_t &c0, vector &axisValue, + vector &ndValue) { + CHECK(axisValue.empty(), GELOGI("AxisValue is empty!"), return true); + CHECK(originalDimVec.empty(), GELOGI("Original dim vector is empty!"), return true); + ndValue = originalDimVec; + /* To differentiate the input datatype of int8 and others */ + axisValue[AXIS_C0] = c0; + if (originalDimVec.size() == NCHW_DIMENSION_NUM) { + axisValue[AXIS_N] = originalDimVec[AXIS_NCHW_DIM_N]; + axisValue[AXIS_C] = originalDimVec[AXIS_NCHW_DIM_C]; + axisValue[AXIS_H] = originalDimVec[AXIS_NCHW_DIM_H]; + axisValue[AXIS_W] = originalDimVec[AXIS_NCHW_DIM_W]; + axisValue[AXIS_C1] = DivisionCeiling(originalDimVec[AXIS_NCHW_DIM_C], (int64_t)c0); + axisValue[AXIS_Co] = c0; + } + return true; +} + +bool AxisUtil::GetAxisValueByNCHW(const vector &originalDimVec, const uint32_t &c0, vector &axisValue, + vector &ndValue) { + CHECK(axisValue.empty(), GELOGI("AxisValue is empty!"), return true); + CHECK(originalDimVec.empty(), GELOGI("Original dim vector is empty!"), return true); + /* C0 Must be set for case ND or 2D-NCHW to NZ */ + axisValue[AXIS_C0] = c0; + CHECK(CheckParams(originalDimVec, c0, axisValue, ndValue) != true, GELOGE(GRAPH_FAILED,"[ERROR]Parameter is invalid!"), + return false); + + axisValue[AXIS_N] = originalDimVec[AXIS_NCHW_DIM_N]; + axisValue[AXIS_C] = originalDimVec[AXIS_NCHW_DIM_C]; + axisValue[AXIS_H] = originalDimVec[AXIS_NCHW_DIM_H]; + axisValue[AXIS_W] = originalDimVec[AXIS_NCHW_DIM_W]; + axisValue[AXIS_C1] = DivisionCeiling(originalDimVec[AXIS_NCHW_DIM_C], (int64_t)c0); + axisValue[AXIS_Co] = c0; + return true; +} + +bool AxisUtil::GetAxisValueByNHWC(const vector &originalDimVec, const uint32_t &c0, vector &axisValue, + vector &ndValue) { + CHECK(axisValue.empty(), GELOGI("AxisValue is empty!"), return true); + CHECK(originalDimVec.empty(), GELOGI("Original dim vector is empty!"), return true); + /* C0 Must be set for case ND or 2D-NHWC to NZ */ + axisValue[AXIS_C0] = c0; + CHECK(CheckParams(originalDimVec, c0, axisValue, ndValue) != true, GELOGE(GRAPH_FAILED, "[ERROR]Parameter is invalid!"), + return false); + + axisValue[AXIS_N] = originalDimVec[AXIS_NHWC_DIM_N]; + axisValue[AXIS_C] = originalDimVec[AXIS_NHWC_DIM_C]; + axisValue[AXIS_H] = originalDimVec[AXIS_NHWC_DIM_H]; + axisValue[AXIS_W] = originalDimVec[AXIS_NHWC_DIM_W]; + axisValue[AXIS_C1] = DivisionCeiling(originalDimVec[AXIS_NHWC_DIM_C], (int64_t)c0); + axisValue[AXIS_Co] = c0; + return true; +} + +bool AxisUtil::GetAxisValueByNC1HWC0(const vector &originalDimVec, const uint32_t &c0, + vector &axisValue, vector &ndValue) { + CHECK(axisValue.empty(), GELOGI("AxisValue is empty!"), return true); + CHECK(originalDimVec.empty(), GELOGI("Original dim vector is empty!"), return true); + CHECK(CheckParams(originalDimVec, c0, axisValue, ndValue) != true, GELOGE(GRAPH_FAILED,"[ERROR]Parameter is invalid!"), + return false); + + auto dimSize = originalDimVec.size(); + if (dimSize == DIM_DEFAULT_SIZE + 1) { + axisValue[AXIS_C1] = originalDimVec[AXIS_NC1HWC0_DIM_C1]; + axisValue[AXIS_C0] = originalDimVec[AXIS_NC1HWC0_DIM_C0]; + axisValue[AXIS_C] = axisValue[AXIS_C1] * axisValue[AXIS_C0]; + } else { + axisValue[AXIS_C1] = DivisionCeiling(originalDimVec[AXIS_NCHW_DIM_C], (int64_t)c0); + axisValue[AXIS_C0] = c0; + axisValue[AXIS_C] = originalDimVec[AXIS_NCHW_DIM_C]; + } + + axisValue[AXIS_N] = originalDimVec[AXIS_NCHW_DIM_N]; + axisValue[AXIS_H] = originalDimVec[AXIS_NCHW_DIM_H]; + axisValue[AXIS_W] = originalDimVec[AXIS_NCHW_DIM_W]; + return true; +} + +bool AxisUtil::GetAxisValueByHWCN(const vector &originalDimVec, const uint32_t &c0, vector &axisValue, + vector &ndValue) { + CHECK(axisValue.empty(), GELOGI("AxisValue is empty!"), return true); + CHECK(originalDimVec.empty(), GELOGI("Original dim vector is empty!"), return true); + /* C0 Must be set for case ND or 2D-NHWC to NZ */ + axisValue[AXIS_C0] = c0; + CHECK(CheckParams(originalDimVec, c0, axisValue, ndValue) != true, GELOGE(GRAPH_FAILED, "[ERROR]Parameter is invalid!"), + return false); + + axisValue[AXIS_N] = originalDimVec[AXIS_HWCN_DIM_N]; + axisValue[AXIS_C] = originalDimVec[AXIS_HWCN_DIM_C]; + axisValue[AXIS_H] = originalDimVec[AXIS_HWCN_DIM_H]; + axisValue[AXIS_W] = originalDimVec[AXIS_HWCN_DIM_W]; + axisValue[AXIS_C1] = DivisionCeiling(originalDimVec[AXIS_HWCN_DIM_C], (int64_t)c0); + axisValue[AXIS_Co] = c0; + return true; +} + +bool AxisUtil::GetAxisValueByC1HWNCoC0(const vector &originalDimVec, const uint32_t &c0, + vector &axisValue, vector &ndValue) { + CHECK(axisValue.empty(), GELOGI("AxisValue is empty!"), return true); + CHECK(originalDimVec.empty(), GELOGI("Original dim vector is empty!"), return true); + /* C0 Must be set for case ND or 2D-NHWC to NZ */ + axisValue[AXIS_C0] = c0; + CHECK(CheckParams(originalDimVec, c0, axisValue, ndValue) != true, GELOGE(GRAPH_FAILED, "[ERROR]Parameter is invalid!"), + return false); + + axisValue[AXIS_N] = originalDimVec[AXIS_C1HWNCoC0_DIM_N]; + axisValue[AXIS_C] = originalDimVec[AXIS_C1HWNCoC0_DIM_C1] * c0; + axisValue[AXIS_H] = originalDimVec[AXIS_C1HWNCoC0_DIM_H]; + axisValue[AXIS_W] = originalDimVec[AXIS_C1HWNCoC0_DIM_W]; + axisValue[AXIS_C1] = originalDimVec[AXIS_C1HWNCoC0_DIM_C1]; + axisValue[AXIS_Co] = originalDimVec[AXIS_C1HWNCoC0_DIM_Co]; + return true; +} +} // namespace transformer +} // namespace common diff --git a/src/common/graph/transformer/src/transfer_shape_according_to_format.cpp b/src/common/graph/transformer/src/transfer_shape_according_to_format.cpp new file mode 100644 index 00000000..eb57ad00 --- /dev/null +++ b/src/common/graph/transformer/src/transfer_shape_according_to_format.cpp @@ -0,0 +1,242 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file transfer_shape_according_to_format.cpp + * \brief set shape according to original format and current format + */ +#include "transformer/inc/transfer_shape_according_to_format.h" + +namespace common { +namespace transformer { +using namespace ge; +using namespace std; + +ShapeTransferAccordingToFormat::ShapeTransferAccordingToFormat(void) { + getNewShapeFuncMap = { + {ge::FORMAT_NCHW, std::make_shared(GetNCHWShapeByAxisValue)}, + {ge::FORMAT_NHWC, std::make_shared(GetNHWCShapeByAxisValue)}, + {ge::FORMAT_NC1HWC0, std::make_shared(GetNC1HWC0ShapeByAxisValue)}, + {ge::FORMAT_FRACTAL_Z, std::make_shared(GetFzShapeByAxisValue)}, + {ge::FORMAT_HWCN, std::make_shared(GetHWCNShapeByAxisValue)}, + {ge::FORMAT_C1HWNCoC0, std::make_shared(GetC1HWNCoC0ShapeByAxisValue)}, + {ge::FORMAT_FRACTAL_NZ, std::make_shared(GetNzShapeByAxisValue)}}; + + mapOfDtypeAndC0 = { + {ge::DT_FLOAT16, SHAPE_NUMBER_16}, {ge::DT_FLOAT, SHAPE_NUMBER_16}, {ge::DT_INT8, SHAPE_NUMBER_32}, + {ge::DT_INT16, SHAPE_NUMBER_16}, {ge::DT_INT32, SHAPE_NUMBER_16}, {ge::DT_INT64, SHAPE_NUMBER_16}, + {ge::DT_UINT8, SHAPE_NUMBER_16}, {ge::DT_UINT16, SHAPE_NUMBER_32}, {ge::DT_UINT32, SHAPE_NUMBER_16}, + {ge::DT_UINT64, SHAPE_NUMBER_16}, {ge::DT_BOOL, SHAPE_NUMBER_16}}; +} + +bool ShapeTransferAccordingToFormat::GetNCHWShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(axisValue.empty(), GELOGD("AxisValue is empty!"), return true); + /* axisValue is initialized as a size 6 vector. */ + newShape.push_back(axisValue[AXIS_N]); + newShape.push_back(axisValue[AXIS_C]); + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + return true; +} + +bool ShapeTransferAccordingToFormat::GetNHWCShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(axisValue.empty(), GELOGD("AxisValue is empty!"), return true); + /* axisValue is initialized as a size 6 vector. */ + newShape.push_back(axisValue[AXIS_N]); + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + newShape.push_back(axisValue[AXIS_C]); + return true; +} + +bool ShapeTransferAccordingToFormat::GetNC1HWC0ShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(axisValue.empty(), GELOGD("AxisValue is empty!"), return true); + /* axisValue is initialized as a size 6 vector. */ + if (implType == EN_IMPL_HW_TBE || implType == EN_IMPL_CUSTOM_TBE || implType == EN_IMPL_NON_PERSISTENT_CUSTOM_TBE) { + newShape.push_back(axisValue[AXIS_N]); + newShape.push_back(axisValue[AXIS_C1]); + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + newShape.push_back(axisValue[AXIS_C0]); + } else { + newShape.push_back(axisValue[AXIS_N]); + newShape.push_back(axisValue[AXIS_C]); + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + } + return true; +} + +bool ShapeTransferAccordingToFormat::GetFzShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(axisValue.empty(), GELOGD("AxisValue is empty!"), return true); + /* axisValue is initialized as a size 6 vector. */ + if (ndValue.size() == SIZE_OF_CN) { + auto sizeOfOriginalVec = ndValue.size(); + newShape = ndValue; + /* sizeOfOriginalVec - 1 mean the last value of original vec + * sizeOfOriginalVec - 2 mean the second last value of original vec */ + newShape[sizeOfOriginalVec - MINUS_VALUE_ONE] = + DivisionCeiling(ndValue[sizeOfOriginalVec - MINUS_VALUE_ONE], SHAPE_NUMBER_16); + newShape[sizeOfOriginalVec - MINUS_VALUE_TWO] = + DivisionCeiling(ndValue[sizeOfOriginalVec - MINUS_VALUE_TWO], axisValue[AXIS_C0]); + newShape.push_back(SHAPE_NUMBER_16); + newShape.push_back(axisValue[AXIS_C0]); + } else { + if (implType == EN_IMPL_HW_TBE || implType == EN_IMPL_CUSTOM_TBE || implType == EN_IMPL_NON_PERSISTENT_CUSTOM_TBE) { + int64_t hwc1 = axisValue[AXIS_C1] * axisValue[AXIS_H] * axisValue[AXIS_W]; + newShape.push_back(hwc1); + newShape.push_back(DivisionCeiling(axisValue[AXIS_N], NI)); + newShape.push_back(NI); + newShape.push_back(axisValue[AXIS_C0]); + } else { + newShape.push_back(axisValue[AXIS_N]); + newShape.push_back(axisValue[AXIS_C]); + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + } + } + + return true; +} + +bool ShapeTransferAccordingToFormat::GetHWCNShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(axisValue.empty(), GELOGD("AxisValue is empty!"), return true); + /* axisValue is initialized as a size 6 vector. */ + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + newShape.push_back(axisValue[AXIS_C]); + newShape.push_back(axisValue[AXIS_N]); + return true; +} + +bool ShapeTransferAccordingToFormat::GetC1HWNCoC0ShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(axisValue.empty(), GELOGD("AxisValue is empty!"), return true); + /* axisValue is initialized as a size 6 vector. */ + newShape.push_back(axisValue[AXIS_C1]); + newShape.push_back(axisValue[AXIS_H]); + newShape.push_back(axisValue[AXIS_W]); + newShape.push_back(axisValue[AXIS_N]); + newShape.push_back(axisValue[AXIS_Co]); + newShape.push_back(axisValue[AXIS_C0]); + return true; +} + +bool ShapeTransferAccordingToFormat::GetNzShapeByAxisValue(vector& newShape, const int64_t& implType, + const vector& axisValue, + const vector& ndValue) { + CHECK(ndValue.empty(), GELOGD("ndValue is empty!"), return true); + CHECK(axisValue.empty() || axisValue.size() <= AXIS_C0, + GELOGD("AxisValue is empty or its size %zu <= AXIS_C0[%u]", axisValue.size(), AXIS_C0), return true); + uint32_t sizeOfOriginalVec = ndValue.size(); + if (sizeOfOriginalVec < MINIMUM_NZ_SHAPE_DIM_NUM) { + GELOGD("ndValue's dim num is less than 2!"); + return true; + } + /* axisValue is initialized as a size 6 vector. */ + newShape = ndValue; + + /* sizeOfOriginalVec - 1 mean the last value of original vec + * sizeOfOriginalVec - 2 mean the second last value of original vec */ + newShape[sizeOfOriginalVec - MINUS_VALUE_ONE] = + DivisionCeiling(ndValue[sizeOfOriginalVec - MINUS_VALUE_TWO], (int64_t)SHAPE_NUMBER_16); + + newShape[sizeOfOriginalVec - MINUS_VALUE_TWO] = + DivisionCeiling(ndValue[sizeOfOriginalVec - MINUS_VALUE_ONE], axisValue[AXIS_C0]); + newShape.push_back(SHAPE_NUMBER_16); + newShape.push_back(axisValue[AXIS_C0]); + return true; +} + +bool ShapeTransferAccordingToFormat::GetShapeAccordingToFormat(ShapeAndFormat& shapeAndFormatInfo, int64_t* c) { + /* The default new shape is old shape */ + shapeAndFormatInfo.newShape = shapeAndFormatInfo.oldShape; + if (shapeAndFormatInfo.oldFormat >= ge::FORMAT_RESERVED || shapeAndFormatInfo.newFormat >= ge::FORMAT_RESERVED) { + GELOGE(GRAPH_FAILED, "Old format %u or new format %u is invalid!", shapeAndFormatInfo.oldFormat, + shapeAndFormatInfo.newFormat); + return false; + } + + if (shapeAndFormatInfo.currentDataType >= ge::DT_UNDEFINED) { + GELOGE(GRAPH_FAILED, "currentDataType %u is invalid!", shapeAndFormatInfo.currentDataType); + return false; + } + AxisUtil* axisutil_object = new AxisUtil(); + if (!axisutil_object->HasAxisValueFunc(shapeAndFormatInfo.oldFormat)) { + delete axisutil_object; + return true; + } + + auto iterGetNewShapeFunc = getNewShapeFuncMap.find(shapeAndFormatInfo.newFormat); + if (iterGetNewShapeFunc == getNewShapeFuncMap.end()) { + GELOGD("Can not get new shape of new format %u!", shapeAndFormatInfo.newFormat); + delete axisutil_object; + return true; + } + GELOGD("Original format %u, new format %u", shapeAndFormatInfo.oldFormat, shapeAndFormatInfo.newFormat); + GetNewShapeByAxisValueAndFormatPtr getNewShapeFunc = iterGetNewShapeFunc->second; + CHECK_NOTNULL(getNewShapeFunc); + std::vector axisValue; + for (uint32_t i = 0; i < AXIS_BOTTOM; i++) { + axisValue.push_back(1); + } + std::vector ndValue; + uint32_t c0; + if (mapOfDtypeAndC0.empty()) { + c0 = SHAPE_NUMBER_16; + } else { + auto iterGetC0 = mapOfDtypeAndC0.find(shapeAndFormatInfo.currentDataType); + if (iterGetC0 == mapOfDtypeAndC0.end()) { + GELOGE(GRAPH_FAILED, "Dtype is not support."); + delete axisutil_object; + return true; + } + c0 = iterGetC0->second; + } + + // The value of C0 should be 4 while format is 5HD-4 or FRAZ-4 + if (shapeAndFormatInfo.newFormat == ge::FORMAT_NC1HWC0_C04) { + c0 = SHAPE_DIM_VALUE_C04; + } + + bool status = axisutil_object->GetAxisValueByOriginFormat( + shapeAndFormatInfo.oldFormat, shapeAndFormatInfo.oldShape, c0, axisValue, ndValue); + if (status != true && shapeAndFormatInfo.newFormat != ge::FORMAT_FRACTAL_NZ) { + delete axisutil_object; + return true; + } + delete axisutil_object; + + shapeAndFormatInfo.newShape.clear(); + (*getNewShapeFunc)(shapeAndFormatInfo.newShape, shapeAndFormatInfo.opImplType, axisValue, ndValue); + if (c != nullptr) { + *c = axisValue[AXIS_C]; + } + return true; +} +} // namespace transformer +} // namespace common diff --git a/src/common/graph/utils/transformer_utils.cc b/src/common/graph/utils/transformer_utils.cc new file mode 100644 index 00000000..f31ad69d --- /dev/null +++ b/src/common/graph/utils/transformer_utils.cc @@ -0,0 +1,160 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "transformer_utils.h" + +#include "external/ge/ge_api_types.h" +#include "framework/common/debug/ge_log.h" +#include "graph/utils/type_utils.h" + +namespace ge { +bool NodeShapeTransUtils::CatchFormatAndShape() { + inputs_ = op_desc_->GetAllInputName(); + outputs_ = op_desc_->GetAllOutputName(); + + for (auto &ele : inputs_) { + auto tensor_desc_input = op_desc_->MutableInputDesc(ele.first); + if (tensor_desc_input == nullptr) { + continue; + } + auto format = tensor_desc_input->GetFormat(); + auto ori_format = tensor_desc_input->GetOriginFormat(); + if (format == ori_format) { + GELOGD("Node is %s, input tensor name is %s. ori format: %s, format: %s is same! No need to catch format&shape!", + op_desc_->GetName().c_str(), ele.first.c_str(), TypeUtils::FormatToSerialString(ori_format).c_str(), + TypeUtils::FormatToSerialString(format).c_str()); + continue; + } + map_format_in_.insert(std::pair(ele.first, format)); + map_ori_format_in_.insert(std::pair(ele.first, ori_format)); + map_dtype_in_.insert(std::pair(ele.first, tensor_desc_input->GetDataType())); + tensor_desc_input->SetFormat(ori_format); + tensor_desc_input->SetShape(tensor_desc_input->GetOriginShape()); + } + + for (auto &ele : outputs_) { + auto tensor_desc_output = op_desc_->MutableOutputDesc(ele.first); + if (tensor_desc_output == nullptr) { + continue; + } + auto format = tensor_desc_output->GetFormat(); + auto ori_format = tensor_desc_output->GetOriginFormat(); + if (format == ori_format) { + GELOGD("Node is %s, output tensor name is %s. ori format: %s, format: %s is same! No need to catch format&shape!", + op_desc_->GetName().c_str(), ele.first.c_str(), TypeUtils::FormatToSerialString(ori_format).c_str(), + TypeUtils::FormatToSerialString(format).c_str()); + continue; + } + map_format_out_.insert(std::pair(ele.first, format)); + map_ori_format_out_.insert(std::pair(ele.first, ori_format)); + map_dtype_out_.insert(std::pair(ele.first, tensor_desc_output->GetDataType())); + + if (format == ori_format) { + continue; + } + tensor_desc_output->SetFormat(ori_format); + } + + return true; +} + +bool NodeShapeTransUtils::UpdateFormatAndShape() { + for (auto &ele : inputs_) { + auto tensor_desc_input = op_desc_->MutableInputDesc(ele.first); + if (tensor_desc_input == nullptr) { + continue; + } + // if can not find saved info, it says format and origin format is same when catched + if (map_format_in_.find(ele.first) == map_format_in_.end()) { + GELOGD("Node is [%s], input tensor name [%s] is not been catched.Skip update action for it!", + op_desc_->GetName().c_str(), ele.first.c_str()); + tensor_desc_input->SetOriginFormat(tensor_desc_input->GetFormat()); + tensor_desc_input->SetOriginShape(tensor_desc_input->GetShape()); + continue; + } + auto ori_format = tensor_desc_input->GetFormat(); + auto ori_shape = tensor_desc_input->GetShape(); + auto curr_format = map_format_in_[ele.first]; + if (ori_format == curr_format) { + continue; + } + std::unique_ptr shape_transfer( + new (std::nothrow) common::transformer::ShapeTransferAccordingToFormat()); + if (shape_transfer == nullptr) { + GELOGE(GRAPH_FAILED, "Memory alloc failed"); + return false; + } + std::vector ori_shape_dims = ori_shape.GetDims(); + std::vector out_dims; + ge::DataType dtype = map_dtype_in_[ele.first]; + common::transformer::ShapeAndFormat shape_and_format_info{ + ori_shape_dims, out_dims, ori_format, curr_format, dtype, common::transformer::EN_IMPL_CUSTOM_TBE}; + shape_transfer->GetShapeAccordingToFormat(shape_and_format_info); + tensor_desc_input->SetFormat(curr_format); + tensor_desc_input->SetShape(GeShape(out_dims)); + } + + for (auto &ele : outputs_) { + auto tensor_desc_output = op_desc_->MutableOutputDesc(ele.first); + if (tensor_desc_output == nullptr) { + continue; + } + // if can not find saved info, it says format and origin format is same when catched + if (map_ori_format_out_.find(ele.first) == map_ori_format_out_.end()) { + GELOGD("Node is [%s], input tensor name [%s] is not been catched.Skip update action for it!", + op_desc_->GetName().c_str(), ele.first.c_str()); + tensor_desc_output->SetOriginFormat(tensor_desc_output->GetFormat()); + tensor_desc_output->SetOriginShape(tensor_desc_output->GetShape()); + continue; + } + auto ori_shape = tensor_desc_output->GetShape(); + auto curr_format = tensor_desc_output->GetFormat(); + if (curr_format != map_ori_format_out_[ele.first]) { + GELOGE(GRAPH_FAILED, "Node is %s, out tensor name is %s. format: %s, recorded origin format: %s is not same", + op_desc_->GetName().c_str(), ele.first.c_str(), TypeUtils::FormatToSerialString(curr_format).c_str(), + TypeUtils::FormatToSerialString(map_ori_format_out_[ele.first]).c_str()); + return GRAPH_FAILED; + } + tensor_desc_output->SetOriginShape(ori_shape); + auto saved_format = map_format_out_[ele.first]; + if (curr_format == saved_format) { + GELOGD("Nodeis %s, out tensor name is %s. ori format: %s, recorded format: %s is same! No need to transfer", + op_desc_->GetName().c_str(), ele.first.c_str(), TypeUtils::FormatToSerialString(curr_format).c_str(), + TypeUtils::FormatToSerialString(saved_format).c_str()); + continue; + } + tensor_desc_output->SetFormat(saved_format); + std::unique_ptr shape_transfer( + new (std::nothrow) common::transformer::ShapeTransferAccordingToFormat()); + if (shape_transfer == nullptr) { + GELOGE(GRAPH_FAILED, "Memory alloc failed"); + return false; + } + std::vector ori_shape_dims = ori_shape.GetDims(); + std::vector out_dims; + ge::DataType dtype = tensor_desc_output->GetDataType(); + common::transformer::ShapeAndFormat shape_and_format_info{ + ori_shape_dims, out_dims, curr_format, saved_format, dtype, common::transformer::EN_IMPL_CUSTOM_TBE}; + shape_transfer->GetShapeAccordingToFormat(shape_and_format_info); + tensor_desc_output->SetShape(GeShape(out_dims)); + GELOGD("Node is %s, out tensor name is %s. Update format and shape success,ori format: %s, format: %s", + op_desc_->GetName().c_str(), ele.first.c_str(), TypeUtils::FormatToSerialString(curr_format).c_str(), + TypeUtils::FormatToSerialString(saved_format).c_str()); + } + GELOGD("Node is %s. Update format and shape success", op_desc_->GetName().c_str()); + return true; +} +} // namespace ge \ No newline at end of file diff --git a/src/common/graph/utils/transformer_utils.h b/src/common/graph/utils/transformer_utils.h new file mode 100644 index 00000000..6595eeed --- /dev/null +++ b/src/common/graph/utils/transformer_utils.h @@ -0,0 +1,50 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMMON_GRAPH_UTILS_TRANSFORMER_UTILS_H_ +#define COMMON_GRAPH_UTILS_TRANSFORMER_UTILS_H_ +#include +#include + +#include "external/graph/types.h" +#include "graph/op_desc.h" +#include "graph/ge_tensor.h" +#include "transformer/inc/transfer_shape_according_to_format.h" + +namespace ge { +class NodeShapeTransUtils { + public: + bool CatchFormatAndShape(); + bool UpdateFormatAndShape(); + + explicit NodeShapeTransUtils(OpDescPtr op_desc) : op_desc_(op_desc) {} + + ~NodeShapeTransUtils() {} + + private: + std::map map_format_in_; + std::map map_ori_format_in_; + std::map map_dtype_in_; + std::map map_format_out_; + std::map map_ori_format_out_; + std::map map_dtype_out_; + std::map inputs_; + std::map outputs_; + + OpDescPtr op_desc_; +}; +} // namespace ge +#endif // COMMON_GRAPH_UTILS_TRANSFORMER_UTILS_H_ \ No newline at end of file diff --git a/src/ge/client/ge_api.cc b/src/ge/client/ge_api.cc index 7c4cf9c8..a0cf22e7 100644 --- a/src/ge/client/ge_api.cc +++ b/src/ge/client/ge_api.cc @@ -260,6 +260,33 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map options; + return AddGraphWithCopy(graph_id, graph, options); +} + +Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, + const std::map &options) { + GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); + return FAILED; + } + std::map str_options; + for (auto it = options.begin(); it != options.end(); ++it) { + str_options.insert({it->first.GetString(), it->second.GetString()}); + } + GELOGD("Adding graph to session"); + Status ret = instance_ptr->SessionManagerObj().AddGraphWithCopy(sessionId_, graph_id, graph, str_options); + if (ret != SUCCESS) { + GELOGE(ret, "AddGraph failed in Session."); + return FAILED; + } + GELOGD("AddGraph finished in Session."); + return ret; +} + Status Session::RemoveGraph(uint32_t graph_id) { GELOGT(TRACE_INIT, "Session RemoveGraph start"); diff --git a/src/ge/common/formats/format_transfers/datatype_transfer.cc b/src/ge/common/formats/format_transfers/datatype_transfer.cc index a603b2f4..a3bf6da3 100644 --- a/src/ge/common/formats/format_transfers/datatype_transfer.cc +++ b/src/ge/common/formats/format_transfers/datatype_transfer.cc @@ -24,6 +24,7 @@ #include "common/fp16_t.h" #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" #include "securec.h" @@ -123,21 +124,25 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result std::pair trans_info(args.src_data_type, args.dst_data_type); auto iter = trans_mode_map.find(trans_info); if (iter == trans_mode_map.end()) { - GELOGE(PARAM_INVALID, "Trans data type from %s to %s is not supported.", - TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), - TypeUtils::DataTypeToSerialString(args.dst_data_type).c_str()); + std::string error = "Failed to trans data from datatype " + + FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)) + " to " + + FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)) + " , it is not supported."; + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } auto trans_mode = iter->second; int size = GetSizeByDataType(args.dst_data_type); if (size <= 0) { - GELOGE(PARAM_INVALID, "Failed to calc size from data type %s", - TypeUtils::DataTypeToSerialString(args.dst_data_type).c_str()); + std::string error = "Failed to calc size from data type" + + FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)) + ", it is not supported."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return PARAM_INVALID; } if (args.src_data_size > static_cast(SIZE_MAX / size)) { - GELOGE(PARAM_INVALID, "args.src_data_size %zu or data type size %d too big.", args.src_data_size, size); + std::string error = + "args.src_data_size" + FmtToStr(args.src_data_size) + " or data type size" + FmtToStr(size) + " is too big"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return PARAM_INVALID; } size_t total_size = static_cast(args.src_data_size * size); @@ -154,9 +159,11 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result } if (CastKernel(args, dst.get(), args.src_data_size, trans_mode) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to cast data from %s to %s, data size %zu", - TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), - TypeUtils::DataTypeToSerialString(args.dst_data_type).c_str(), args.src_data_size); + std::string error = "Failed to cast data from datatype " + + FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)) + " to " + + FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)) + ", data size is " + + FmtToStr(std::to_string(args.src_data_size)); + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error.c_str()); return INTERNAL_ERROR; } result.data = dst; diff --git a/src/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/src/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc index 40dc749d..e1a018be 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -35,14 +36,16 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { auto src_shape = args.src_shape; auto dst_shape = args.dst_shape; if (args.src_format != FORMAT_C1HWNCoC0 || args.dst_format != FORMAT_HWCN) { - GELOGE(UNSUPPORTED, "Does not support trans format from %s to %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Dose not support trans format from " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } if (!CheckDataTypeSupported(args.src_data_type)) { - GELOGE(UNSUPPORTED, "Failed to trans shape from NC1HWNCoC0 to HWCN, invalid data type %s", - TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); + std::string error = "Failed to trans shape from NC1HWNCoC0 to HWCN, invalid data type" + + FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } if (!CheckShapeValid(src_shape, kC1hwncoc0DimsNum)) { @@ -58,8 +61,9 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { src_shape.at(kC1hwncoc0H) != dst_shape.at(kHwcnH) || src_shape.at(kC1hwncoc0W) != dst_shape.at(kHwcnW) || src_shape.at(kC1hwncoc0N) != dst_shape.at(kHwcnN) || src_shape.at(kC1hwncoc0Co) != cube_size || src_shape.at(kC1hwncoc0C0) != cube_size) { - GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", - ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); + std::string error = "Failed to check relationship between src and dst shape, src shape" + + FmtToStr(ShapeToString(src_shape)) + ", dst shape" + FmtToStr(ShapeToString(dst_shape)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return PARAM_INVALID; } diff --git a/src/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/src/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc index 76d8696a..28914c30 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc @@ -149,11 +149,7 @@ Status FormatTransferDhwcnFractalZ3D::TransFormat(const TransArgs &args, TransRe if (ret != SUCCESS) { return ret; } - if (!args.dst_shape.empty() && args.dst_shape != expect_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, the dst shape %s is invalid, expect %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.dst_shape).c_str(), - ShapeToString(expect_shape).c_str()); + if (!IsTransShapeDstCorrect(args, expect_shape)) { return PARAM_INVALID; } diff --git a/src/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/src/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc index 9de2e3a0..3a9f16d3 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc @@ -150,11 +150,7 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransFormat(const TransArgs &args if (ret != SUCCESS) { return ret; } - if (!args.dst_shape.empty() && args.dst_shape != expect_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, the dst shape %s is invalid, expect %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.dst_shape).c_str(), - ShapeToString(expect_shape).c_str()); + if (!IsTransShapeDstCorrect(args, expect_shape)) { return PARAM_INVALID; } diff --git a/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index 65798f29..69a928fc 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -39,8 +40,9 @@ bool CheckShape(Format format, const ShapeVector &shape) { case FORMAT_NHWC: return CheckShapeValid(shape, kDimSize4D); default: - GELOGE(PARAM_INVALID, "Trans format between %s and FORMAT_FRACTAL_NZ is not supported.", - TypeUtils::FormatToSerialString(format).c_str()); + std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + + " and FORMAT_FRACTAL_NZ is not supported."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } } @@ -103,11 +105,7 @@ Status CheckShapeRelation(const TransArgs &args, ShapeVector &hw_shape) { ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); return INTERNAL_ERROR; } - if (args.src_shape != expect_src_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, invalid relationship between src shape %s and dst %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), - ShapeToString(args.dst_shape).c_str()); + if (!IsTransShapeSrcCorrect(args, expect_src_shape)) { return PARAM_INVALID; } return SUCCESS; @@ -279,11 +277,7 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & if (ret != SUCCESS) { return ret; } - if (args.dst_shape != expect_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, the dst shape %s is invalid, expect %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.dst_shape).c_str(), - ShapeToString(expect_shape).c_str()); + if (!IsTransShapeDstCorrect(args, expect_shape)) { return PARAM_INVALID; } return TransFormatFromNdToFracNz(args, result, hw_shape); diff --git a/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index f2ec29da..baf13b98 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -23,6 +23,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -159,8 +160,9 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { ret = memset_s(dst.get() + offset, static_cast(protected_size), 0, static_cast(size)); } else { if (protected_size < size) { - GELOGE(INTERNAL_ERROR, "Failed to operate the dst memory, protected_size is %ld and size is %ld", - protected_size, size); + std::string error = "Failed to operate the dst memory, protected_size is " + FmtToStr(protected_size) + + " and size is " + FmtToStr(size); + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error.c_str()); return INTERNAL_ERROR; } char *dst_data = reinterpret_cast(dst.get() + offset); @@ -345,11 +347,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r if (ret != SUCCESS) { return ret; } - if (!args.dst_shape.empty() && args.dst_shape != expect_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, the dst shape %s is invalid, expect %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.dst_shape).c_str(), - ShapeToString(expect_shape).c_str()); + if (!IsTransShapeDstCorrect(args, expect_shape)) { return PARAM_INVALID; } diff --git a/src/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/src/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc index d5507765..239fb497 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -39,8 +40,9 @@ bool CheckShape(Format format, const ShapeVector &shape) { case FORMAT_NHWC: return CheckShapeValid(shape, kDimSize4D); default: - GELOGE(PARAM_INVALID, "Not support trans format between %s and FORMAT_FRACTAL_ZZ.", - TypeUtils::FormatToSerialString(format).c_str()); + std::string error = "Trans format between " + FmtToStr(TypeUtils::FormatToSerialString(format)) + + " and FORMAT_FRACTAL_ZZ is not supported."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } } @@ -103,12 +105,7 @@ Status CheckShapeRelation(const TransArgs &args, ShapeVector &hw_shape) { ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); return INTERNAL_ERROR; } - if (args.src_shape != expect_src_shape) { - GELOGE(PARAM_INVALID, - "Failed to trans format from %s to %s, invalid relationship between src shape %s and dst shape %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), - ShapeToString(args.dst_shape).c_str()); + if (!IsTransShapeSrcCorrect(args, expect_src_shape)) { return PARAM_INVALID; } return SUCCESS; @@ -289,11 +286,7 @@ Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult & if (ret != SUCCESS) { return ret; } - if (args.dst_shape != expect_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, the dst shape %s is invalid, expect %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.dst_shape).c_str(), - ShapeToString(expect_shape).c_str()); + if (!IsTransShapeDstCorrect(args, expect_shape)) { return PARAM_INVALID; } return TransFormatFromNdToFracZz(args, result, hw_shape); diff --git a/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc b/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc index b0eebcfa..6cbd6a5a 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -33,9 +34,10 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { auto src_shape = args.src_shape; auto dst_shape = args.dst_shape; if (args.src_format != FORMAT_FRACTAL_Z || args.dst_format != FORMAT_HWCN) { - GELOGE(UNSUPPORTED, "Does not support trans format from %s to %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Dose not support trans format from " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } if (!CheckDataTypeSupported(args.src_data_type)) { @@ -59,8 +61,9 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { int64_t n0 = Ceil(dst_shape.at(kHwcnN), static_cast(kNiSize)); if (src_shape.at(kFracZHWC1) != dst_shape.at(kHwcnH) * dst_shape.at(kHwcnW) * c1 || src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { - GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", - ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); + std::string error = "Failed to check relationship between src shape" + FmtToStr(ShapeToString(src_shape)) + + " and dst shape" + FmtToStr(ShapeToString(dst_shape)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return PARAM_INVALID; } diff --git a/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc index 9f8d9e39..88e07ea9 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -33,9 +34,10 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { auto src_shape = args.src_shape; auto dst_shape = args.dst_shape; if (args.src_format != FORMAT_FRACTAL_Z || args.dst_format != FORMAT_NCHW) { - GELOGE(UNSUPPORTED, "Does not support trans format from %s to %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Dose not support trans format from " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } if (!CheckDataTypeSupported(args.src_data_type)) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc b/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc index 9a1e5f3b..9f1b627d 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -33,9 +34,10 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { auto src_shape = args.src_shape; auto dst_shape = args.dst_shape; if (args.src_format != FORMAT_FRACTAL_Z || args.dst_format != FORMAT_NHWC) { - GELOGE(UNSUPPORTED, "Does not support trans format from %s to %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Dose not support trans format from " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } if (!CheckDataTypeSupported(args.src_data_type)) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/src/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc index 7101256a..1c5ec0f9 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -50,9 +51,10 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector &src_shape, DataType d Status CheckArgsForNchwToNc1hwc0(const TransArgs &args) { if (args.src_format != FORMAT_NCHW || args.dst_format != FORMAT_NC1HWC0) { - GELOGE(UNSUPPORTED, "Does not support trans format from %s to %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Dose not support trans format from " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } std::vector expect_5d_shape; diff --git a/src/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/src/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc index a5be94ff..c5b2adf7 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -22,6 +22,7 @@ #include "common/formats/utils/formats_definitions.h" #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "graph/utils/type_utils.h" namespace ge { @@ -51,9 +52,10 @@ Status TransShapeNhwcToNc1hwc0(const std::vector &src_shape, DataType d Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { if (args.src_format != FORMAT_NHWC || args.dst_format != FORMAT_NC1HWC0) { - GELOGE(UNSUPPORTED, "Does not support trans format from %s to %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Dose not support trans format from " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } if (!CheckDataTypeSupported(args.src_data_type)) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_transpose.cc b/src/ge/common/formats/format_transfers/format_transfer_transpose.cc index 3be4d67d..ca88371f 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -48,28 +48,31 @@ std::map>> perm_args{ bool IsShapeArgValid(const std::vector &src_shape, const std::vector &perm_arg) { if (src_shape.empty()) { + std::string error = "Failed to transpose, empty src shape"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); GELOGE(PARAM_INVALID, "Failed to transpose, empty src shape"); return false; } for (auto dim : src_shape) { if (dim < 0) { - GELOGE(PARAM_INVALID, "Failed to transpose, negative dim in src shape %s", ShapeToString(src_shape).c_str()); + std::string error = "Failed to transpose, negative dim in src shape " + FmtToStr(ShapeToString(src_shape)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } } if (perm_arg.size() != src_shape.size()) { - GELOGE(PARAM_INVALID, - "Failed to transpose, the size of src shape(%zu) and" - " perm arg(%zu) are different", - src_shape.size(), perm_arg.size()); + std::string error = "Failed to transpose, the size of src shape" + FmtToStr(src_shape.size()) + " and perm arg" + + FmtToStr(perm_arg.size()) + " are different"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } std::vector exists(perm_arg.size()); for (auto perm : perm_arg) { if (perm < 0 || static_cast(perm) >= perm_arg.size() || ++exists[perm] > 1) { - GELOGE(PARAM_INVALID, "Failed to transpose, duplicated perm arg %ld, perm arg %s", perm, - JoinToString(perm_arg).c_str()); + std::string error = + "Failed to transpose, duplicated perm arg " + FmtToStr(perm) + ", perm arg " + FmtToStr(JoinToString(perm_arg)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } } @@ -192,9 +195,10 @@ Status TransposeWithShapeCheck(const uint8_t *data, const std::vector & } auto expected_shape = TransShapeByPerm(src_shape, perm_arg); if (dst_shape != expected_shape) { - GELOGE(PARAM_INVALID, "Failed to trans axis for perm_arg %s, invalid dst shape %s, expect %s", - ShapeToString(perm_arg).c_str(), ShapeToString(dst_shape).c_str(), ShapeToString(expected_shape).c_str()); - return PARAM_INVALID; + std::string error = "Failed to trans axis for perm_arg" + FmtToStr(ShapeToString(perm_arg)) + + ", invalid dst shape" + FmtToStr(ShapeToString(dst_shape)) + ", expect" + + FmtToStr(ShapeToString(expected_shape)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); } return Transpose(data, src_shape, src_data_type, perm_arg, result); @@ -203,14 +207,18 @@ Status TransposeWithShapeCheck(const uint8_t *data, const std::vector & Status GetPermByForamt(Format src_format, Format dst_format, std::vector &perm) { auto dst_iter = perm_args.find(src_format); if (dst_iter == perm_args.end()) { - GELOGE(UNSUPPORTED, "Failed to trans shape, do not support transpose from format %s to %s", - TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); + std::string error = "Failed to trans shape, do not support transpose from format " + + FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } auto iter = dst_iter->second.find(dst_format); if (iter == dst_iter->second.end()) { - GELOGE(UNSUPPORTED, "Failed to trans shape, do not support transpose from format %s to %s", - TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); + std::string error = "Failed to trans shape, do not support transpose from format " + + FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } perm = iter->second; @@ -223,11 +231,7 @@ Status FormatTransferTranspose::TransFormat(const TransArgs &args, TransResult & if (ret != SUCCESS) { return ret; } - if (args.dst_shape != expected_shape) { - GELOGE(PARAM_INVALID, "Failed to trans format from %s to %s, invalid dst shape %s, expect %s", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.dst_shape).c_str(), - ShapeToString(expected_shape).c_str()); + if (!IsTransShapeDstCorrect(args, expected_shape)) { return PARAM_INVALID; } diff --git a/src/ge/common/formats/formats.cc b/src/ge/common/formats/formats.cc index d01d055b..a8373706 100644 --- a/src/ge/common/formats/formats.cc +++ b/src/ge/common/formats/formats.cc @@ -26,6 +26,7 @@ #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/utils/type_utils.h" @@ -34,9 +35,10 @@ namespace formats { GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransFormat(const TransArgs &args, TransResult &result) { auto transfer = BuildFormatTransfer(args); if (transfer == nullptr) { - GELOGE(UNSUPPORTED, "Failed to trans data from format %s to %s, unsupport now", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Failed to trans data from format " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } @@ -58,9 +60,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form args.dst_format = dst_format; auto transfer = BuildFormatTransfer(args); if (transfer == nullptr) { - GELOGE(UNSUPPORTED, "Failed to trans data from format %s to %s, unsupport now", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str()); + std::string error = "Failed to trans data from format " + + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } @@ -70,9 +73,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransDataType(const CastArgs &args, TransResult &result) { auto transfer = BuildDataTypeTransfer(args); if (transfer == nullptr) { - GELOGE(UNSUPPORTED, "Failed to trans data from datatype %s to %s, unsupport now", - TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), - TypeUtils::DataTypeToSerialString(args.dst_data_type).c_str()); + std::string error = "Failed to trans data from datatype " + + FmtToStr(TypeUtils::DataTypeToSerialString(args.src_data_type)) + " to " + + FmtToStr(TypeUtils::DataTypeToSerialString(args.dst_data_type)); + GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); return UNSUPPORTED; } diff --git a/src/ge/common/formats/utils/formats_trans_utils.cc b/src/ge/common/formats/utils/formats_trans_utils.cc index 23da0f74..06bc49a3 100644 --- a/src/ge/common/formats/utils/formats_trans_utils.cc +++ b/src/ge/common/formats/utils/formats_trans_utils.cc @@ -20,6 +20,7 @@ #include "common/formats/utils/formats_definitions.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/utils/type_utils.h" @@ -29,8 +30,9 @@ int64_t GetCubeSizeByDataType(DataType data_type) { // Current cube does not support 4 bytes and longer data auto size = GetSizeByDataType(data_type); if (size <= 0) { - GELOGE(PARAM_INVALID, "Failed to get cube size, the data type %s is invalid", - TypeUtils::DataTypeToSerialString(data_type).c_str()); + std::string error = "Failed to get cube size, the data type " + + FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return -1; } else if (size == 1) { return kCubeSize * 2; // 32 bytes cube size @@ -57,7 +59,8 @@ int64_t GetItemNumByShape(const std::vector &shape) { bool CheckShapeValid(const std::vector &shape, const int64_t expect_dims) { if (expect_dims <= 0 || shape.size() != static_cast(expect_dims)) { - GELOGE(PARAM_INVALID, "Invalid shape, dims num %zu, expect %ld", shape.size(), expect_dims); + std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) + ", expect " + FmtToStr(expect_dims); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } return IsShapeValid(shape); @@ -70,11 +73,13 @@ bool IsShapeValid(const std::vector &shape) { int64_t num = 1; for (auto dim : shape) { if (dim < 0) { - GELOGE(PARAM_INVALID, "Invalid negative dim in the shape %s", ShapeToString(shape).c_str()); + std::string error = "Invalid negative dims in the shape " + FmtToStr(ShapeToString(shape)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } if (dim != 0 && kShapeItemNumMAX / dim < num) { - GELOGE(PARAM_INVALID, "Shape overflow, the total count should be less than %ld!", kShapeItemNumMAX); + std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); return false; } num *= dim; @@ -94,5 +99,29 @@ bool IsShapeEqual(const GeShape &src, const GeShape &dst) { } return true; } + +bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector &expect_shape) { + if (args.src_shape != expect_shape) { + std::string error = "Failed to trans format from" + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + + " to " + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + + ", invalid relationship between src shape " + FmtToStr(ShapeToString(args.src_shape)) + + " and dst " + FmtToStr(ShapeToString(args.dst_shape)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + return false; + } + return true; +} + +bool IsTransShapeDstCorrect(const TransArgs &args, std::vector &expect_shape) { + if (!args.dst_shape.empty() && args.dst_shape != expect_shape) { + std::string error = "Failed to trans format from " + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + + " to " + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" + + FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" + + FmtToStr(ShapeToString(expect_shape)); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); + return false; + } + return true; +} } // namespace formats } // namespace ge diff --git a/src/ge/common/formats/utils/formats_trans_utils.h b/src/ge/common/formats/utils/formats_trans_utils.h index 8b6f0604..6525a1e3 100644 --- a/src/ge/common/formats/utils/formats_trans_utils.h +++ b/src/ge/common/formats/utils/formats_trans_utils.h @@ -23,6 +23,7 @@ #include #include "external/graph/types.h" #include "graph/ge_tensor.h" +#include "register/register_format_transfer.h" namespace ge { namespace formats { @@ -61,6 +62,10 @@ bool IsShapeValid(const std::vector &shape); bool IsShapeEqual(const GeShape &src, const GeShape &dst); +bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector &expect_shape); + +bool IsTransShapeDstCorrect(const TransArgs &args, std::vector &expect_shape); + template T Ceil(T n1, T n2) { if (n1 == 0) { diff --git a/src/ge/common/ge_common.mk b/src/ge/common/ge_common.mk index 45ee1057..28536195 100644 --- a/src/ge/common/ge_common.mk +++ b/src/ge/common/ge_common.mk @@ -61,17 +61,18 @@ GE_COMMON_LOCAL_C_INCLUDES := \ proto/tensorflow/types.proto \ proto/tensorflow/resource_handle.proto \ $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ $(TOPDIR)inc/external \ - $(TOPDIR)inc/external/graph \ - $(TOPDIR)inc/framework \ - $(TOPDIR)inc/common/util \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)metadef/inc/common/util \ $(TOPDIR)libc_sec/include \ $(TOPDIR)third_party/json/include \ $(TOPDIR)third_party/protobuf/include \ $(TOPDIR)third_party/openssl/include/x86/include \ - $(TOPDIR)framework/domi \ - $(TOPDIR)framework/domi/common \ - $(TOPDIR)framework/domi/common/op \ $(TOPDIR)graphengine/ge \ $(TOPDIR)graphengine/ge/common \ $(TOPDIR)graphengine/ge/common/op \ diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index 692e50b0..dfdb1196 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -21,6 +21,7 @@ #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" +#include "graph/load/new_model_manager/davinci_model.h" namespace { const char *const kJobID = "jobID"; @@ -39,10 +40,12 @@ const std::string kConfigNumsdev = "devNums"; const std::string kConfigDevIdList = "devIdList"; const std::string kProfStart = "prof_start"; const std::string kProfStop = "prof_stop"; +const std::string kProfModelSubscribe = "prof_model_subscribe"; +const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() {} +ProfilingManager::ProfilingManager() : subscribe_count_(0) {} ProfilingManager::~ProfilingManager() {} @@ -54,6 +57,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); + subscribe_count_ = 0; job_id_ = options.job_id; GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); @@ -380,7 +384,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( - const std::vector &task_desc_info, const int32_t &device_id) { + uint32_t model_id, const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); if (reporter == nullptr) { @@ -400,6 +404,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin .append(std::to_string(task_id)) .append(" ") .append(std::to_string(stream_id)) + .append(" ") + .append(std::to_string(model_id)) .append("\n")); Msprof::Engine::ReporterData reporter_data{}; @@ -424,7 +430,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( - const std::vector &compute_graph_desc_info, const int32_t &device_id) { + uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); @@ -482,6 +488,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin data.append("\""); } + data.append(" model_id:").append(std::to_string(model_id)); + data.append("\n"); Msprof::Engine::ReporterData reporter_data{}; @@ -536,7 +544,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { + uint32_t model_id, const std::vector &task_desc_info, + const std::vector &compute_graph_desc_info, bool check_device) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -545,7 +554,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGI("current logic_device_id:%d", logic_device_id); - if (!is_acl_api_mode_) { + if (check_device) { auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (ret == device_id_.end()) { GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); @@ -553,9 +562,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr } } GELOGI("start ProfilingTaskDescInfo."); - ProfilingTaskDescInfo(task_desc_info, logic_device_id); + ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); GELOGI("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); + ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); GELOGI("Report profiling data for GE end."); #endif } @@ -573,6 +582,102 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP return module; } +void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module) { +#ifdef DAVINCI_SUPPORT_PROFILING + if (prof_type == kProfModelSubscribe) { + if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { + subs_dev_module_[device_id].subscribe_count++; + } else { + DeviceSubsInfo dev_info; + dev_info.module = module; + dev_info.subscribe_count = 1; + subs_dev_module_[device_id] = dev_info; + } + } else if (prof_type == kProfModelUnsubscribe) { + if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) { + if (subs_dev_module_[device_id].subscribe_count > 0) { + subs_dev_module_[device_id].subscribe_count--; + } + } + } else { + GELOGI("No need to update device_id module map."); + } +#endif +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelSubscribe(uint64_t module, + void *model) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK; + if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) { + // register framework to profiling + int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_); + if (result != SUCCESS) { + GELOGE(FAILED, "Register profiling engine failed."); + return FAILED; + } + GELOGI("Prof subscribe: model load profiling on."); + } + subscribe_count_++; + + auto davinci_model = static_cast(model); + int32_t device_num = 1; + uint32_t device[1]; + device[0] = davinci_model->GetDeviceId(); + rtError_t rt_ret = rtProfilerStart(module, device_num, device); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler start failed."); + return FAILED; + } + UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module); + + // Report profiling data + Status p_ret = davinci_model->ReportProfilingData(false); + if (p_ret != SUCCESS) { + GELOGE(p_ret, "Report profiling data failed."); + return p_ret; + } +#endif + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelUnsubscribe(void *model) { +#ifdef DAVINCI_SUPPORT_PROFILING + std::lock_guard lock(mutex_); + if (subscribe_count_ == 0) { + GELOGW("The profiler has not been subscribed, you do not need to cannel the subscription."); + return SUCCESS; + } + + auto davinci_model = static_cast(model); + int32_t dev_num = 1; + uint32_t device[1]; + device[0] = davinci_model->GetDeviceId(); + auto iter = subs_dev_module_.find(device[0]); + if (iter != subs_dev_module_.end()) { + if (subs_dev_module_[device[0]].subscribe_count == 1) { + rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(FAILED, "Runtime profiler stop failed."); + return FAILED; + } + } + UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module); + } + + subscribe_count_--; + if (subscribe_count_ == 0) { + int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE); + if (ret != SUCCESS) { + GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret); + return ret; + } + } +#endif + return SUCCESS; +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); @@ -740,6 +845,7 @@ ProfilingManager::ProfStartProfiling(uint64_t module, const std::map(device_list[i]); } GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); + rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler config proc failed."); diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h index a030efd3..647b6f1a 100644 --- a/src/ge/common/profiling/profiling_manager.h +++ b/src/ge/common/profiling/profiling_manager.h @@ -39,6 +39,10 @@ namespace { const std::string GE_PROFILING_MODULE = "Framework"; } // namespace namespace ge { +struct DeviceSubsInfo { + uint64_t module; + uint32_t subscribe_count; +}; // register Plugin class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf { public: @@ -73,6 +77,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); + void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); + ge::Status ProfModelSubscribe(uint64_t module, void *model); + ge::Status ProfModelUnsubscribe(void *model); ge::Status ProfInit(uint64_t module); ge::Status ProfFinalize(); ge::Status ProfStartProfiling(uint64_t module, const std::map &config_para); @@ -84,13 +91,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern + bool IsAclApiMode() const { return is_acl_api_mode_; } int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } - void ReportProfilingData(const std::vector &task_desc_info, - const std::vector &compute_graph_desc_info); + void ReportProfilingData(uint32_t model_id, const std::vector &task_desc_info, + const std::vector &compute_graph_desc_info, bool check_device); void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, Msprof::Engine::ReporterData &reporter_data); - void ProfilingTaskDescInfo(const std::vector &task_desc_info, const int32_t &device_id); - void ProfilingGraphDescInfo(const std::vector &compute_graph_desc_info, + void ProfilingTaskDescInfo(uint32_t model_id, const std::vector &task_desc_info, + const int32_t &device_id); + void ProfilingGraphDescInfo(uint32_t model_id, const std::vector &compute_graph_desc_info, const int32_t &device_id); void SetProfilingConfig(const string &profiling_cfg); vector GetProfilingDeviceId() const { return device_id_; } @@ -121,7 +130,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string system_trace_conf_; string task_trace_conf_; const ProfilingEngineImpl engine_; - map device_id_module_map_; // key: device_id, value: profiling on module + map device_id_module_map_; // key: device_id, value: profiling on module + map subs_dev_module_; // key: device_id, value: profiling on module + uint32_t subscribe_count_; std::mutex mutex_; }; } // namespace ge diff --git a/src/ge/common/types.cc b/src/ge/common/types.cc index 220923c5..0a7c03c9 100644 --- a/src/ge/common/types.cc +++ b/src/ge/common/types.cc @@ -54,6 +54,7 @@ const std::map PROFILE_COMPONENT_MAP{ {"runtime", RTS_PROFILE}, }; const std::string PROFILE_CONFIG = "config"; +const std::string PROFILE_MODEL_ID = "modelId"; REGISTER_OPTYPE_DEFINE(DATA, "Data"); REGISTER_OPTYPE_DEFINE(AIPPDATA, "AippData"); diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index e642f41c..b1675f52 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -1060,6 +1060,19 @@ Status GeExecutor::ReleaseSingleOpResource(void *stream) { return SingleOpManager::GetInstance().ReleaseResource(stream); } +Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) { + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + auto davinci_model = model_manager->GetModel(model_id); + if (davinci_model == nullptr) { + GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + return FAILED; + } + + device_id = davinci_model->GetDeviceId(); + return SUCCESS; +} + Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { std::vector> batch_info; int32_t dynamic_type = static_cast(FIXED); diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk index eaa611d2..4244ea92 100644 --- a/src/ge/executor/module.mk +++ b/src/ge/executor/module.mk @@ -72,9 +72,13 @@ local_ge_executor_c_include := \ proto/task.proto \ proto/om.proto \ $(TOPDIR)inc/external \ - $(TOPDIR)inc/external/graph \ - $(TOPDIR)inc/framework \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ $(LOCAL_PATH)/../ \ $(TOPDIR)graphengine/ge \ $(TOPDIR)libc_sec/include \ diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index 81928c1b..3b1dba01 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -287,11 +287,15 @@ COMMON_LOCAL_C_INCLUDES := \ proto/tensorflow/versions.proto \ $(LOCAL_PATH) ./ \ $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ $(TOPDIR)inc/external \ - $(TOPDIR)inc/external/graph \ - $(TOPDIR)inc/framework \ - $(TOPDIR)inc/framework/common \ - $(TOPDIR)inc/common \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)graphengine/inc/framework/common \ + $(TOPDIR)metadef/inc/common \ $(TOPDIR)inc/runtime \ $(TOPDIR)libc_sec/include \ $(TOPDIR)ops/built-in/op_proto/inc \ @@ -301,7 +305,7 @@ COMMON_LOCAL_C_INCLUDES := \ third_party/opencv/include \ ANALYZER_LOCAL_INCLUDES := \ - $(TOPDIR)framework/domi/analyzer \ + $(TOPDIR)graphengine/ge/analyzer \ NEW_OMG_HOST_SRC_FILES := \ graph/preprocess/insert_op/util_insert_aipp_op.cc \ @@ -341,15 +345,18 @@ DEVICE_LOCAL_C_INCLUDES := \ proto/tensorflow/versions.proto \ $(LOCAL_PATH) ./ \ $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ $(TOPDIR)libc_sec/include \ $(TOPDIR)inc/external \ - $(TOPDIR)inc/external/graph \ - $(TOPDIR)inc/common/util \ - $(TOPDIR)inc/framework \ - $(TOPDIR)inc/framework/common \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)metadef/inc/common/util \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)graphengine/inc/framework/common \ $(TOPDIR)inc/runtime \ $(TOPDIR)ops/built-in/op_proto/inc \ - $(TOPDIR)framework/domi \ $(TOPDIR)graphengine/ge \ $(TOPDIR)toolchain/ide/ide-daemon/external \ third_party/json/include \ diff --git a/src/ge/ge_local_engine/module.mk b/src/ge/ge_local_engine/module.mk index 0f95d352..a3c571d9 100644 --- a/src/ge/ge_local_engine/module.mk +++ b/src/ge/ge_local_engine/module.mk @@ -17,12 +17,15 @@ ops_kernel_builder_src_files := ops_kernel_store/ge_local_ops_kernel_builder.cc local_lib_inc_path := proto/task.proto \ ${LOCAL_PATH} \ ${TOPDIR}inc \ + ${TOPDIR}metadef/inc \ + ${TOPDIR}graphengine/inc \ ${TOPDIR}inc/external \ - ${TOPDIR}inc/external/graph \ + ${TOPDIR}metadef/inc/external \ + ${TOPDIR}graphengine/inc/external \ + ${TOPDIR}metadef/inc/external/graph \ $(TOPDIR)libc_sec/include \ ${TOPDIR}third_party/protobuf/include \ - ${TOPDIR}inc/framework \ - $(TOPDIR)framework/domi \ + ${TOPDIR}graphengine/inc/framework \ $(TOPDIR)graphengine/ge \ #compiler for host diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index 7d1058f4..c9be823f 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -300,6 +300,8 @@ LIBGE_LOCAL_SRC_FILES := \ hybrid/hybrid_davinci_model.cc \ executor/ge_executor.cc \ analyzer/analyzer.cc \ + ir_build/ge_ir_build.cc \ + ir_build/atc_ir_common.cc \ LIBCLIENT_LOCAL_SRC_FILES := \ proto/ge_api.proto \ @@ -311,16 +313,20 @@ RUNNER_LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../ \ $(LOCAL_PATH)/../../ \ $(TOPDIR)inc \ - $(TOPDIR)inc/common \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ + $(TOPDIR)metadef/inc/common \ $(TOPDIR)inc/external \ - $(TOPDIR)inc/external/graph \ - $(TOPDIR)inc/framework \ - $(TOPDIR)inc/framework/common \ - $(TOPDIR)inc/graph \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)metadef/inc/external/graph \ + $(TOPDIR)graphengine/inc/external/ge \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)graphengine/inc/framework/common \ + $(TOPDIR)metadef/inc/graph \ $(TOPDIR)inc/runtime \ $(TOPDIR)libc_sec/include \ $(TOPDIR)ops/built-in/op_proto/inc \ - $(TOPDIR)framework/domi/analyzer \ $(TOPDIR)graphengine/ge/analyzer \ $(TOPDIR)toolchain/ide/ide-daemon/external \ proto/fwk_adapter.proto \ @@ -403,6 +409,7 @@ LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ ../../out/ge/lib64/stub/ge_prof.cc \ + ../../out/ge/lib64/stub/ge_ir_build.cc \ LOCAL_SHARED_LIBRARIES := diff --git a/src/ge/graph/build/label_allocator.cc b/src/ge/graph/build/label_allocator.cc index f8fbe28b..dd9f1560 100644 --- a/src/ge/graph/build/label_allocator.cc +++ b/src/ge/graph/build/label_allocator.cc @@ -26,7 +26,7 @@ namespace ge { LabelAllocator::LabelAllocator(const ComputeGraphPtr &graph) : compute_graph_(graph) {} -Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { +Status LabelAllocator::AssignFunctionalLabels() { if (compute_graph_ == nullptr) { GELOGE(INTERNAL_ERROR, "ComputeGraph not set, Assign labels failed."); return INTERNAL_ERROR; @@ -42,7 +42,7 @@ Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { } // Add label for functional op. - label_index = 0; + uint32_t label_index = 0; for (auto node : functional_nodes) { LabelMakerPtr maker = LabelMakerFactory::Instance().Create(node->GetType(), compute_graph_, node); if (maker == nullptr) { @@ -56,7 +56,8 @@ Status LabelAllocator::AssignFunctionalLabels(uint32_t &label_index) { } } - GELOGI("AssignFunctionalLabels success."); + (void)AttrUtils::SetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_index); + GELOGI("AssignFunctionalLabels success, Num: %u.", label_index); return SUCCESS; } @@ -66,13 +67,29 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::setGetParentNode(); - if (parent == nullptr) { - GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", graph->GetName().c_str()); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip label allocator.", graph->GetName().c_str()); + return true; + } + + NodePtr func_node = graph->GetParentNode(); + if (func_node == nullptr) { + GELOGE(INTERNAL_ERROR, "Parent functional node not set: %s.", graph->GetName().c_str()); return false; } - (void)functional_nodes.insert(parent); // unique functional node. + ComputeGraphPtr owner_graph = func_node->GetOwnerComputeGraph(); + if (owner_graph == nullptr) { + GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", func_node->GetName().c_str()); + return false; + } + + if (owner_graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip label allocator.", owner_graph->GetName().c_str()); + return true; + } + + (void)functional_nodes.insert(func_node); // unique functional node. return true; } } // namespace ge diff --git a/src/ge/graph/build/label_allocator.h b/src/ge/graph/build/label_allocator.h index 01811e1d..2a58206e 100644 --- a/src/ge/graph/build/label_allocator.h +++ b/src/ge/graph/build/label_allocator.h @@ -28,7 +28,7 @@ class LabelAllocator { explicit LabelAllocator(const ComputeGraphPtr &graph); ~LabelAllocator() = default; - Status AssignFunctionalLabels(uint32_t &label_index); + Status AssignFunctionalLabels(); private: bool CollectFunctionalNode(ComputeGraphPtr &graph, std::set &functional_nodes); diff --git a/src/ge/graph/build/logical_stream_allocator.cc b/src/ge/graph/build/logical_stream_allocator.cc index 4d113f7e..9c1c3b93 100644 --- a/src/ge/graph/build/logical_stream_allocator.cc +++ b/src/ge/graph/build/logical_stream_allocator.cc @@ -348,7 +348,11 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vectorsubgraph_info.GetSubGraph(); for (NodePtr &node : compute_graph->GetDirectNode()) { GE_CHECK_NOTNULL(node->GetOpDesc()); - if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) { + if (node->GetOpDesc()->HasAttr(ATTR_NAME_RTS_LABEL_NODE)) { + node->GetOpDesc()->SetStreamId(context.default_stream); + GELOGD("Node %s of type %s in subgraph %s is assigned parent stream %ld (engine: %s).", node->GetName().c_str(), + node->GetType().c_str(), subgraph->name.c_str(), context.default_stream, engine_name.c_str()); + } else if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) { GELOGD("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s).", node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str()); } else { diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index c09f3ba7..db7b6854 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -885,6 +885,15 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GELOGI("Unreusable block."); continue; } + std::string batch_label; + if (reusable_block->IsSameLabel(batch_label)) { + std::string op_label; + (void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label); + if (batch_label != op_label) { + GELOGI("label diff, op name %s", node_op_desc->GetName().c_str()); + continue; + } + } // A node can reuse blocks of the same stream and preorder streams if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { diff --git a/src/ge/graph/build/memory/module.mk b/src/ge/graph/build/memory/module.mk index 2d669a50..73617794 100644 --- a/src/ge/graph/build/memory/module.mk +++ b/src/ge/graph/build/memory/module.mk @@ -11,12 +11,15 @@ local_lib_src_files := memory_assigner.cc \ local_lib_inc_path := ${LOCAL_PATH} \ ${TOPDIR}inc \ + ${TOPDIR}metadef/inc \ + ${TOPDIR}graphengine/inc \ ${TOPDIR}inc/external \ - ${TOPDIR}inc/external/graph \ + ${TOPDIR}metadef/inc/external \ + ${TOPDIR}graphengine/inc/external \ + ${TOPDIR}metadef/inc/external/graph \ $(TOPDIR)libc_sec/include \ ${TOPDIR}third_party/protobuf/include \ - ${TOPDIR}inc/framework \ - $(TOPDIR)framework/domi \ + ${TOPDIR}graphengine/inc/framework \ $(TOPDIR)graphengine/ge \ #compiler for host diff --git a/src/ge/graph/build/model_builder.cc b/src/ge/graph/build/model_builder.cc index 6285bced..ea2272fa 100644 --- a/src/ge/graph/build/model_builder.cc +++ b/src/ge/graph/build/model_builder.cc @@ -24,7 +24,6 @@ #include "graph/anchor.h" #include "graph/attr_value.h" #include "graph/buffer.h" -#include "graph/build/label_allocator.h" #include "graph/build/stream_allocator.h" #include "graph/common/omg_util.h" #include "graph/common/ge_call_wrapper.h" @@ -43,7 +42,6 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" -#include "graph/passes/memcpy_addr_async_pass.h" #include "init/gelib.h" #include "memory/memory_assigner.h" #include "omg/version.h" @@ -419,6 +417,14 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { return FAILED); GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_); + string fp_ceiling_mode; + if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { + if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { + GELOGE(FAILED, "Failed to set attr ATTR_FP_CEILING_MODE"); + return FAILED; + } + GELOGI("Set attr ATTR_FP_CEILING_MODE to model, value is %s.", fp_ceiling_mode.c_str()); + } string ge_core_type; Status ret = ge::GetContext().GetOption(kCoreType, ge_core_type); @@ -695,25 +701,8 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_TIMESTAMP_END(AssignLogicalStreams, "GraphBuilder::AssignLogicalStreams"); // Assign functional op labels. - GE_TIMESTAMP_START(AssignFunctionalLabels); - LabelAllocator label_allocator(compute_graph_); - GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(label_num_), "Assign label failed."); - GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); - - // Add memcpy_addr_async node. - rtFeatureType_t feature_type = FEATURE_TYPE_MEMCPY; - int32_t feature_info = MEMCPY_INFO_SUPPORT_ZEROCOPY; - int64_t value = 0; - rtError_t rt_ret = rtGetRtCapability(feature_type, feature_info, &value); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtGetRtCapability failed."); - return RT_FAILED; - } else { - GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); - MemcpyAddrAsyncPass memcpy_addr; - GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed."); - GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); - } + auto root_graph = GraphUtils::FindRootGraph(compute_graph_); + (void)AttrUtils::GetInt(*root_graph, ATTR_MODEL_LABEL_NUM, label_num_); GE_TIMESTAMP_START(AssignMemory); MemoryAssigner mem_assigner(compute_graph_); diff --git a/src/ge/graph/common/transop_util.cc b/src/ge/graph/common/transop_util.cc index eb80fb69..d90067b2 100644 --- a/src/ge/graph/common/transop_util.cc +++ b/src/ge/graph/common/transop_util.cc @@ -80,4 +80,13 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { } return true; } + +std::string TransOpUtil::TransopMapToString() { + std::string buffer; + for (auto &key : Instance().transop_index_map_) { + buffer += key.first + " "; + } + return buffer; +} + } // namespace ge diff --git a/src/ge/graph/common/transop_util.h b/src/ge/graph/common/transop_util.h index 8b10ad5c..3332e1fb 100644 --- a/src/ge/graph/common/transop_util.h +++ b/src/ge/graph/common/transop_util.h @@ -35,6 +35,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { static bool CheckPrecisionLoss(const NodePtr &src_node); + static std::string TransopMapToString(); + private: TransOpUtil(); diff --git a/src/ge/graph/label/label_maker.cc b/src/ge/graph/label/label_maker.cc index 88b90199..3ef3c131 100644 --- a/src/ge/graph/label/label_maker.cc +++ b/src/ge/graph/label/label_maker.cc @@ -23,75 +23,65 @@ #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" -namespace { -const int64_t kInvalidStreamId = -1; -} // namespace - namespace ge { /** * @ingroup ge - * @brief Set stream id for head node. + * @brief Link node to graph head. * @param [in] graph: graph for add node. - * @param [in] op_desc: OpDesc for set logical stream id. + * @param [in] node: Node add to graph head. * @return: void */ -void LabelMaker::SetStreamIdEnter(const ComputeGraphPtr &graph, const OpDescPtr &op_desc) { - int64_t stream_id = kInvalidStreamId; - const auto &node_list = graph->GetDirectNode(); - for (size_t i = 0; i < node_list.size(); ++i) { - const auto &node = node_list.at(i); - GE_CHECK_NOTNULL_EXEC(node, continue); +void LabelMaker::LinkToGraphHead(const ComputeGraphPtr &graph, const NodePtr &node) { + static const std::set non_calc_types = {DATA, CONSTANT, CONSTANTOP, VARIABLE}; + for (auto &n : graph->GetDirectNode()) { + if (non_calc_types.count(n->GetType()) > 0) { + continue; + } - stream_id = node->GetOpDesc()->GetStreamId(); - if (stream_id != kInvalidStreamId) { - break; + const auto nodes = n->GetInDataNodes(); + if (nodes.empty()) { + continue; } - } - GELOGI("SetStreamId: Node %s assign stream is %ld.", op_desc->GetName().c_str(), stream_id); - op_desc->SetStreamId(stream_id); -} + bool is_head_node = true; + for (auto &in_node : nodes) { + if (non_calc_types.count(in_node->GetType()) == 0) { + is_head_node = false; + break; + } + } -/** - * @ingroup ge - * @brief Set stream id for tail node. - * @param [in] graph: graph for add node. - * @param [in] op_desc: OpDesc for set logical stream id. - * @return: void - */ -void LabelMaker::SetStreamIdLeave(const ComputeGraphPtr &graph, const OpDescPtr &op_desc) { - int64_t stream_id = kInvalidStreamId; - const auto &node_list = graph->GetDirectNode(); - for (size_t i = node_list.size(); i > 0; --i) { - const auto &node = node_list.at(i - 1); // i from list size, need shift 1. - GE_CHECK_NOTNULL_EXEC(node, continue); + if (!is_head_node) { + continue; + } - stream_id = node->GetOpDesc()->GetStreamId(); - if (stream_id != kInvalidStreamId) { - break; + if (GraphUtils::AddEdge(node->GetOutControlAnchor(), n->GetInControlAnchor()) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add ctrl edge from %s to %s failed.", node->GetName().c_str(), n->GetName().c_str()); } } - - GELOGI("SetStreamId: Node %s assign stream is %ld.", op_desc->GetName().c_str(), stream_id); - op_desc->SetStreamId(stream_id); } /** * @ingroup ge - * @brief Set stream id for parent node. + * @brief Link node to graph tail. * @param [in] graph: graph for add node. - * @param [in] op_desc: OpDesc for set logical stream id. + * @param [in] node: Node add to graph tail. * @return: void */ -void LabelMaker::SetStreamIdOwner(const ComputeGraphPtr &graph, const OpDescPtr &op_desc) { - int64_t stream_id = kInvalidStreamId; - const auto &node = graph->GetParentNode(); - if (node != nullptr) { - stream_id = node->GetOpDesc()->GetStreamId(); - } +void LabelMaker::LinkToGraphTail(const ComputeGraphPtr &graph, const NodePtr &node) { + auto tail = graph->FindFirstNodeMatchType(NETOUTPUT); + while (tail != nullptr) { + auto nodes = tail->GetOutControlNodes(); + if (!nodes.empty()) { + tail = nodes.at(0); + continue; + } - GELOGI("SetStreamId: Node %s assign stream is %ld.", op_desc->GetName().c_str(), stream_id); - op_desc->SetStreamId(stream_id); + if (GraphUtils::AddEdge(tail->GetOutControlAnchor(), node->GetInControlAnchor()) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add ctrl edge from %s to %s failed.", tail->GetName().c_str(), node->GetName().c_str()); + } + return; + } } /** @@ -112,7 +102,7 @@ NodePtr LabelMaker::AddStreamActive(const ComputeGraphPtr &graph, const std::str OpDescPtr op_desc = MakeShared(name, STREAMACTIVE); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdOwner(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("StreamActive: Create node %s.", op_desc->GetName().c_str()); vector active_streams; @@ -122,6 +112,7 @@ NodePtr LabelMaker::AddStreamActive(const ComputeGraphPtr &graph, const std::str NodePtr stream_active = graph->AddNodeFront(op_desc); GE_CHECK_NOTNULL_EXEC(stream_active, return nullptr); + LinkToGraphHead(graph, stream_active); return stream_active; } @@ -146,7 +137,7 @@ NodePtr LabelMaker::AddLabelSetEnter(const ComputeGraphPtr &graph, const std::st OpDescPtr op_desc = MakeShared(name, LABELSET); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdOwner(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("LabelSet: Create node %s.", op_desc->GetName().c_str()); (void)AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, index); @@ -173,19 +164,9 @@ NodePtr LabelMaker::AddLabelSetEnter(const ComputeGraphPtr &graph, const std::st NodePtr LabelMaker::AddLabelSetLeave(const ComputeGraphPtr &graph, const std::string &name, uint32_t index) { GE_CHECK_NOTNULL_EXEC(graph, return nullptr); - const auto &node_list = graph->GetDirectNode(); - auto it = node_list.end(); - if (it == node_list.begin()) { - GELOGE(INTERNAL_ERROR, "LabelSet: Graph %s node is empty.", graph->GetName().c_str()); - return nullptr; - } - --it; - const NodePtr &node = *it; - GE_CHECK_NOTNULL_EXEC(node, return nullptr); - OpDescPtr op_desc = MakeShared(name, LABELSET); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdOwner(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("LabelSet: Create node %s.", op_desc->GetName().c_str()); (void)AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, index); @@ -194,11 +175,7 @@ NodePtr LabelMaker::AddLabelSetLeave(const ComputeGraphPtr &graph, const std::st GE_CHECK_NOTNULL_EXEC(label_set, return nullptr); // Link control edge to graph tail. - if (GraphUtils::AddEdge(node->GetOutControlAnchor(), label_set->GetInControlAnchor()) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "LabelSet: Add ctrl edge to %s failed.", node->GetName().c_str()); - return nullptr; - } - + LinkToGraphTail(graph, label_set); return label_set; } @@ -222,7 +199,7 @@ NodePtr LabelMaker::AddLabelGotoEnter(const ComputeGraphPtr &graph, const std::s OpDescPtr op_desc = MakeShared(name, LABELGOTOEX); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdOwner(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("LabelGoto: Create node %s.", op_desc->GetName().c_str()); (void)AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, index); @@ -246,32 +223,17 @@ NodePtr LabelMaker::AddLabelGotoEnter(const ComputeGraphPtr &graph, const std::s NodePtr LabelMaker::AddLabelGotoLeave(const ComputeGraphPtr &graph, const std::string &name, uint32_t index) { GE_CHECK_NOTNULL_EXEC(graph, return nullptr); - const auto &node_list = graph->GetDirectNode(); - auto it = node_list.end(); - if (it == node_list.begin()) { - GELOGE(INTERNAL_ERROR, "LabelGoto: Graph %s node is empty.", graph->GetName().c_str()); - return nullptr; - } - --it; - const NodePtr &node = *it; - GE_CHECK_NOTNULL_EXEC(node, return nullptr); - OpDescPtr op_desc = MakeShared(name, LABELGOTOEX); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdLeave(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("LabelGoto: Create node %s.", op_desc->GetName().c_str()); (void)AttrUtils::SetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, index); NodePtr label_goto = graph->AddNode(op_desc); GE_CHECK_NOTNULL_EXEC(label_goto, return nullptr); - SetStreamIdOwner(graph, op_desc); // Link control edge to graph tail. - if (GraphUtils::AddEdge(node->GetOutControlAnchor(), label_goto->GetInControlAnchor()) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "LabelGoto: Add ctrl edge to %s failed.", node->GetName().c_str()); - return nullptr; - } - + LinkToGraphTail(graph, label_goto); return label_goto; } @@ -297,7 +259,7 @@ NodePtr LabelMaker::AddLabelSwitchEnter(const ComputeGraphPtr &graph, const std: OpDescPtr op_desc = MakeShared(name, LABELSWITCHBYINDEX); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdOwner(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("LabelSwitchByIndex: Create node %s.", op_desc->GetName().c_str()); if (op_desc->AddInputDesc(desc) != GRAPH_SUCCESS) { @@ -332,19 +294,9 @@ NodePtr LabelMaker::AddLabelSwitchLeave(const ComputeGraphPtr &graph, const std: const std::vector &labels) { GE_CHECK_NOTNULL_EXEC(graph, return nullptr); - const auto &node_list = graph->GetDirectNode(); - auto it = node_list.end(); - if (it == node_list.begin()) { - GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Graph %s node is empty.", graph->GetName().c_str()); - return nullptr; - } - --it; - const NodePtr &node = *it; - GE_CHECK_NOTNULL_EXEC(node, return nullptr); - OpDescPtr op_desc = MakeShared(name, LABELSWITCHBYINDEX); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - SetStreamIdOwner(graph, op_desc); + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, true); GELOGI("LabelSwitchByIndex: Create node %s.", op_desc->GetName().c_str()); if (op_desc->AddInputDesc(desc) != GRAPH_SUCCESS) { @@ -361,11 +313,7 @@ NodePtr LabelMaker::AddLabelSwitchLeave(const ComputeGraphPtr &graph, const std: GE_CHECK_NOTNULL_EXEC(label_switch, return nullptr); // Link control edge to graph tail. - if (GraphUtils::AddEdge(node->GetOutControlAnchor(), label_switch->GetInControlAnchor()) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "LabelSwitchByIndex: Add ctrl edge to %s failed.", node->GetName().c_str()); - return nullptr; - } - + LinkToGraphTail(graph, label_switch); return label_switch; } @@ -385,7 +333,6 @@ NodePtr LabelMaker::AddLabelSwitchIndex(const ComputeGraphPtr &graph, const std: OpDescPtr op_desc = MakeShared(name, DATA); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); - op_desc->SetStreamId(kInvalidStreamId); GELOGI("Data: Create node %s.", op_desc->GetName().c_str()); if (op_desc->AddInputDesc(desc) != GRAPH_SUCCESS) { diff --git a/src/ge/graph/label/label_maker.h b/src/ge/graph/label/label_maker.h index 759bf5cf..9575d535 100644 --- a/src/ge/graph/label/label_maker.h +++ b/src/ge/graph/label/label_maker.h @@ -60,9 +60,8 @@ class LabelMaker { ComputeGraphPtr parent_graph_; private: - void SetStreamIdEnter(const ComputeGraphPtr &graph, const OpDescPtr &op_desc); - void SetStreamIdLeave(const ComputeGraphPtr &graph, const OpDescPtr &op_desc); - void SetStreamIdOwner(const ComputeGraphPtr &graph, const OpDescPtr &op_desc); + void LinkToGraphHead(const ComputeGraphPtr &graph, const NodePtr &node); + void LinkToGraphTail(const ComputeGraphPtr &graph, const NodePtr &node); }; } // namespace ge #endif // GE_GRAPH_PASSES_LABEL_MAKER_H_ diff --git a/src/ge/graph/load/new_model_manager/data_dumper.h b/src/ge/graph/load/new_model_manager/data_dumper.h index c1a102ad..01318da2 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.h +++ b/src/ge/graph/load/new_model_manager/data_dumper.h @@ -86,6 +86,7 @@ class DataDumper { void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } const DumpProperties &GetDumpProperties() const { return dump_properties_; } bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; + const std::vector &GetAllOpDescInfo() const { return op_desc_info_; } // Dump exception info Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file); diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index d589bd8a..faa7da59 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -88,6 +88,9 @@ const uint32_t kDataMemAlignSizeCompare = 64; const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; +const int32_t kInvalidStream = -1; +const uint32_t kEndOfSequence = 0x0704000a; +const uint32_t kEndOfSequenceNew = 507005; inline bool IsDataOp(const std::string &node_type) { return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; @@ -259,7 +262,6 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { /// void DavinciModel::Shrink() { ge_model_.reset(); // delete object. - op_list_.clear(); } Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { @@ -612,7 +614,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_DISMISS_GUARD(stream); stream_list_.push_back(stream); - GELOGD("Stream index:%u, stream:%p.", i, stream); + int32_t rt_stream_id = kInvalidStream; + (void)rtGetStreamId(stream, &rt_stream_id); + GELOGI("Logical stream index:%u, stream:%p, rtstream: %d.", i, stream, rt_stream_id); } for (uint32_t i = 0; i < EventNum(); i++) { @@ -654,18 +658,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GE_IF_BOOL_EXEC(IsBroadCastOpData(node), (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); } - // for profiling - op_name_map_ = compute_graph->GetGraphOpName(); - - vector op_name; - GE_IF_BOOL_EXEC(ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name), - GELOGI("get str of task_index_op_name")); - if (op_name_map_.empty()) { - for (size_t idx = 0; idx < op_name.size(); idx++) { - op_name_map_[idx] = op_name[idx]; - } - GELOGI("Infer profiling: op_name_size(%zu)", op_name.size()); - } GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); @@ -677,7 +669,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size auto all_dump_model = GetDumpProperties().GetAllDumpModel(); bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { + bool dump_l1fusion_op = + (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || findByOmName || findByModelName; + if (dump_l1fusion_op) { // malloc 2M for dump l1fusion op GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); @@ -691,16 +685,21 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); + string fp_ceiling_mode; + if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { + GELOGI("Get attr ATTR_FP_CEILING_MODE from model, value is %s.", fp_ceiling_mode.c_str()); + // mode 0: Do not perform saturation processing. By default, IEEE754 is used. + GE_CHK_RT_RET(rtSetCtxINFMode((fp_ceiling_mode != "0"))); + } + // collect profiling for ge - if (ProfilingManager::Instance().ProfilingModelLoadOn()) { - std::vector compute_graph_desc_info; - Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info); - if (ret1 != SUCCESS) { - GELOGE(ret1, "GetComputeGraphInfo failed."); - return ret1; + auto &profiling_manager = ProfilingManager::Instance(); + if (profiling_manager.ProfilingModelLoadOn()) { + Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode()); + if (p_ret != SUCCESS) { + GELOGE(p_ret, "Report profiling data failed."); + return p_ret; } - ProfilingManager::Instance().ReportProfilingData(GetTaskDescInfo(), compute_graph_desc_info); - GE_CHK_STATUS(SinkModelProfile(), "Sink model profile failed."); } Shrink(); @@ -708,6 +707,20 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size return ret; } +Status DavinciModel::ReportProfilingData(bool check_device) { + std::vector compute_graph_desc_info; + Status ret = GetComputeGraphInfo(compute_graph_desc_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetComputeGraphInfo failed."); + return ret; + } + ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device); + GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); + op_list_.clear(); + + return SUCCESS; +} + /// /// @ingroup ge /// @brief Travel all nodes and determine if destruction is required. @@ -2572,7 +2585,7 @@ void *DavinciModel::Run(DavinciModel *model) { GE_TIMESTAMP_START(rtStreamSynchronize); GELOGI("rtStreamSynchronize start."); rt_ret = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret == RT_ERROR_END_OF_SEQUENCE) { + if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { seq_end_flag = true; } GE_IF_BOOL_EXEC( @@ -2901,34 +2914,25 @@ Status DavinciModel::DistributeTask() { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } - // get op_name by task_index - if (task->GetCtx() != nullptr) { - auto iter = op_name_map_.find(task_index); - if (iter == op_name_map_.end()) { - continue; - } - - // else task index is found in op_name_map_ - TaskDescInfo task_desc_info; - string op_name = op_name_map_[task_index]; - if (!om_name_.empty()) { - task_desc_info.model_name = om_name_; - } else { - task_desc_info.model_name = name_; - } - task_desc_info.op_name = op_name; - task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); - task_desc_info.task_id = task->GetTaskID(); - task_desc_info.stream_id = task->GetStreamId(); - task_desc_info_.emplace_back(task_desc_info); - if (flag) { - if (task->GetSktTaskID() != 0xFFFFFFFF) { - TaskDescInfo task_desc_info; - string op_name = "super_kernel_" + to_string(task_index); - task_desc_info.op_name = op_name; - task_desc_info.task_id = task->GetSktTaskID(); - task_desc_info_.emplace_back(task_desc_info); - } + // Load task info for profiling + TaskDescInfo task_desc_info; + if (!om_name_.empty()) { + task_desc_info.model_name = om_name_; + } else { + task_desc_info.model_name = name_; + } + task_desc_info.op_name = op->GetName(); + task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim(); + task_desc_info.task_id = task->GetTaskID(); + task_desc_info.stream_id = task->GetStreamId(); + task_desc_info_.emplace_back(task_desc_info); + if (flag) { + if (task->GetSktTaskID() != 0xFFFFFFFF) { + TaskDescInfo task_desc_info; + string op_name = "super_kernel_" + to_string(task_index); + task_desc_info.op_name = op_name; + task_desc_info.task_id = task->GetSktTaskID(); + task_desc_info_.emplace_back(task_desc_info); } } } @@ -3818,50 +3822,31 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea main_follow_stream_mapping_[main_stream_id].emplace_back(stream); } -Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector &graph_desc_info) { +Status DavinciModel::GetComputeGraphInfo(vector &graph_desc_info) { GELOGI("GetComputeGraphInfo start."); - for (auto &node : graph->GetAllNodes()) { + auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); + for (auto &op_desc : all_op_desc) { ComputeGraphDescInfo compute_graph_info; - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is nullptr."); - return PARAM_INVALID; + if (!om_name_.empty()) { + compute_graph_info.model_name = om_name_; + } else { + compute_graph_info.model_name = name_; } + compute_graph_info.op_name = op_desc.op_name; + compute_graph_info.op_type = op_desc.op_type; + compute_graph_info.input_format = op_desc.input_format; + compute_graph_info.input_shape = op_desc.input_shape; + compute_graph_info.input_data_type = op_desc.input_data_type; + compute_graph_info.output_format = op_desc.output_format; + compute_graph_info.output_shape = op_desc.output_shape; + compute_graph_info.output_data_type = op_desc.output_data_type; - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - if (!om_name_.empty()) { - compute_graph_info.model_name = om_name_; - } else { - compute_graph_info.model_name = name_; - } - compute_graph_info.op_name = op_desc->GetName(); - compute_graph_info.op_type = op_desc->GetType(); - - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } - - graph_desc_info.emplace_back(compute_graph_info); - } + graph_desc_info.emplace_back(compute_graph_info); } GELOGI("GetComputeGraphInfo end."); return SUCCESS; } + void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) { if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) { tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_; diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 8cc824f4..85b2922f 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -439,6 +439,8 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); + Status ReportProfilingData(bool check_device = true); + void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); } @@ -828,7 +830,7 @@ class DavinciModel { Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); // get desc info of graph for profiling - Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector &graph_desc_info); + Status GetComputeGraphInfo(vector &graph_desc_info); void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); @@ -947,7 +949,6 @@ class DavinciModel { std::map used_tbe_handle_map_; // for profiling task and graph info - std::map op_name_map_; std::vector task_desc_info_; int64_t maxDumpOpNum_; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 894a3df2..e3c6873c 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -43,6 +43,8 @@ const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStop = "prof_stop"; +const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; +const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; struct CustAicpuSoBuf { @@ -334,11 +336,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptrSetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + - timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond - davinci_model->SetProfileTime(MODEL_LOAD_END); - } + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond + davinci_model->SetProfileTime(MODEL_LOAD_END); } while (0); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); @@ -562,10 +562,15 @@ Status ModelManager::Stop(uint32_t model_id) { /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { - {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, - {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, - {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, - {kCmdTypeProfStop, HandleProfStopCommand}}; + {kCmdTypeProfile, HandleProfileCommand}, + {kCmdTypeDump, HandleDumpCommand}, + {kCmdTypeProfiling, HandleAclProfilingCommand}, + {kCmdTypeProfInit, HandleProfInitCommand}, + {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, + {kCmdTypeProfStart, HandleProfStartCommand}, + {kCmdTypeProfStop, HandleProfStopCommand}, + {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, + {kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -591,6 +596,76 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) { return SUCCESS; } +Status ModelManager::GetModelByCmd(const Command &command, std::shared_ptr &davinci_model) { + if (command.cmd_params.size() < kCmdParSize) { + GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.", + command.cmd_type.c_str()); + return PARAM_INVALID; + } + + std::string map_key = command.cmd_params[0]; + std::string value = command.cmd_params[1]; + if (map_key == PROFILE_MODEL_ID) { + int32_t model_id = 0; + try { + model_id = std::stoi(value); + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str()); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str()); + return PARAM_INVALID; + } catch (...) { + GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str()); + return FAILED; + } + + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + davinci_model = model_manager->GetModel(static_cast(model_id)); + if (davinci_model == nullptr) { + GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + return FAILED; + } + } else { + GELOGE(FAILED, "The model_id parameter is not found in the command."); + return FAILED; + } + + return SUCCESS; +} + +Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) { + std::shared_ptr davinci_model = nullptr; + Status ret = GetModelByCmd(command, davinci_model); + if (ret != SUCCESS) { + return ret; + } + + if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, static_cast(davinci_model.get())) != + SUCCESS) { + GELOGE(FAILED, "Handle prof model subscribe failed."); + return FAILED; + } + + return SUCCESS; +} + +Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { + std::shared_ptr davinci_model = nullptr; + Status ret = GetModelByCmd(command, davinci_model); + if (ret != SUCCESS) { + return ret; + } + + if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast(davinci_model.get())) != SUCCESS) { + GELOGE(FAILED, "Handle prof model unsubscribe failed."); + return FAILED; + } + + return SUCCESS; +} + Status ModelManager::HandleProfInitCommand(const Command &command) { uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { @@ -973,11 +1048,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGI("Parse model %u success.", model_id); - if (ProfilingManager::Instance().ProfilingModelLoadOn()) { - davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + - timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond - davinci_model->SetProfileTime(MODEL_LOAD_END); - } + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond + davinci_model->SetProfileTime(MODEL_LOAD_END); GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index 8e23b0ae..f2a8b9c8 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -158,10 +158,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { static ge::Status HandleAclProfilingCommand(const Command &command); static ge::Status HandleProfileCommand(const Command &command); static ge::Status HandleDumpCommand(const Command &command); + static ge::Status HandleProfModelSubscribeCommand(const Command &command); + static ge::Status HandleProfModelUnsubscribeCommand(const Command &command); static ge::Status HandleProfInitCommand(const Command &command); static ge::Status HandleProfFinalizeCommand(const Command &command); static ge::Status HandleProfStartCommand(const Command &command); static ge::Status HandleProfStopCommand(const Command &command); + + static ge::Status GetModelByCmd(const Command &command, std::shared_ptr &davinci_model); /// /// @ingroup domi_ome /// @brief get model memory usage diff --git a/src/ge/graph/load/new_model_manager/zero_copy_task.cc b/src/ge/graph/load/new_model_manager/zero_copy_task.cc index 7db9c459..4a083f33 100644 --- a/src/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/src/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -45,7 +45,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { if (it == task_addr_offset_.end()) { task_addr_offset_[addr] = {offset}; } else { - it->second.push_back(offset); + it->second.insert(offset); } GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, diff --git a/src/ge/graph/load/new_model_manager/zero_copy_task.h b/src/ge/graph/load/new_model_manager/zero_copy_task.h index c83387e8..7b428e5c 100644 --- a/src/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/src/ge/graph/load/new_model_manager/zero_copy_task.h @@ -99,7 +99,7 @@ class ZeroCopyTask { bool is_updated_; string batch_label_; //
- map> task_addr_offset_; + map> task_addr_offset_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index e0a6d751..17fa1299 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -100,6 +100,8 @@ #include "graph/passes/subgraph_const_migration_pass.h" #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" +#include "graph/passes/memcpy_addr_async_pass.h" +#include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" #include "graph/utils/type_utils.h" #include "graph/graph_util.h" @@ -131,6 +133,22 @@ bool IsTailingOptimization() { GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default."); return false; } + +ge::Status CheckFpCeilingMode() { + static const std::unordered_set kValidFpCeilingMode = {"0", "1", "2"}; + string mode; + auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); + if (ret == ge::GRAPH_SUCCESS) { + if (kValidFpCeilingMode.count(mode) == 0) { + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", mode.c_str()); + return ge::GE_GRAPH_OPTIONS_INVALID; + } + GELOGI("The parameter fp_ceiling_mode is set to %s.", mode.c_str()); + return ge::SUCCESS; + } + GELOGW("The parameter fp_ceiling_mode is not set."); + return ge::SUCCESS; +} } // namespace namespace ge { @@ -162,6 +180,12 @@ Status GraphManager::Initialize(const std::map &options) { return ret; } + ret = CheckFpCeilingMode(); + if (ret != SUCCESS) { + GELOGE(ret, "[Initialize] Check fp-ceiling-mode options failed."); + return ret; + } + ret = graph_context_->Initialize(options); if (ret != SUCCESS) { GELOGE(ret, "[Initialize] GraphContext initialize failed."); @@ -325,6 +349,78 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, return SUCCESS; } +Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &graph, + const std::map &options, + const OmgContext &omg_context) { + if (HasGraphNode(graph_id)) { + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id); + return GE_GRAPH_GRAPH_ALREADY_EXIST; + } + auto compute_graph = GraphUtils::GetComputeGraph(graph); + if (compute_graph != nullptr) { + compute_graph->SetGraphID(graph_id); + bool graph_has_been_added = false; + if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) && + graph_has_been_added) { + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] same graph object can not be added again, graph_id = %u.", + graph_id); + return GE_GRAPH_GRAPH_ALREADY_EXIST; + } + } else { + GELOGE(FAILED, "compute graph is null"); + return FAILED; + } + std::vector input_nodes; + std::vector output_nodes; + auto new_compute_graph = GraphUtils::CloneGraph(compute_graph, "", input_nodes, output_nodes); + std::string session_graph_id; + if (!AttrUtils::GetStr(*new_compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id) || + session_graph_id.empty()) { + session_graph_id = "-1_" + to_string(graph_id); + if (!AttrUtils::SetStr(*new_compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id)) { + GELOGW("Set attribute of compute graph failed."); + } + for (auto &subgraph : new_compute_graph->GetAllSubgraphs()) { + (void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); + } + GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]"); + } + + GraphNodePtr graph_node = MakeShared(graph_id); + if (graph_node == nullptr) { + GELOGE(FAILED, "GraphNode make shared failed"); + return FAILED; + } + std::shared_ptr graph_ptr = GraphUtils::CreateGraphPtrFromComputeGraph(new_compute_graph); + if (graph_ptr == nullptr) { + GELOGE(FAILED, "GraphPtr make shared failed"); + return FAILED; + } + + graph_node->SetGraph(graph_ptr); + graph_node->SetOptions(options); + AddGraphNode(graph_id, graph_node); + + AddLocalOmgContext(graph_id, omg_context); + if (!options_.output_datatype.empty()) { + GetLocalOmgContext().output_type = options_.output_datatype; + } + + CompilerStages &stages = GetCompilerStages(graph_id); + stages.preparer.SetOptions(options_); + Status status = stages.optimizer.SetOptions(options_); + if (status != SUCCESS) { + GELOGE(status, "Graph optimizer set options failed."); + return status; + } + stages.builder.SetOptions(options_); + + var_acc_ctrl_.AddGraph(graph_id, new_compute_graph); + + GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); + return SUCCESS; +} + Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph, GraphId root_graph_id) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); @@ -625,6 +721,13 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, compute_graph); + + Status ret = compute_graph->TopologicalSorting(); + if (ret != SUCCESS) { + GELOGE(ret, "Graph topological sort failed, ret:%d.", ret); + return ret; + } + GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); GELOGI("PreRun:PreRunAfterOptimizeSubGraph success."); return SUCCESS; @@ -2170,6 +2273,18 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { return ret; } + // Assign functional op labels. + GE_TIMESTAMP_START(AssignFunctionalLabels); + LabelAllocator label_allocator(compute_graph); + GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(), "Assign label failed."); + GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); + + // Add memcpy addr asynchronous node. + GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); + MemcpyAddrAsyncPass memcpy_addr; + GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph), "Add memcpy_addr_async node failed."); + GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); + // After while sub graph handle, mark all node rw type auto result = GetCompilerStages(compute_graph->GetGraphID()).optimizer.HandleMemoryRWConflict(compute_graph); if (result != SUCCESS) { @@ -2180,11 +2295,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ChangeConstTypeWhenTraining(compute_graph); - ret = compute_graph->TopologicalSorting(); - if (ret != SUCCESS) { - GELOGE(ret, "Graph topological sort failed, ret:%d.", ret); - return ret; - } GELOGI("End optimize after merge sub graph."); return SUCCESS; } diff --git a/src/ge/graph/manager/graph_manager.h b/src/ge/graph/manager/graph_manager.h index 0a492cfb..3f2cfa9e 100644 --- a/src/ge/graph/manager/graph_manager.h +++ b/src/ge/graph/manager/graph_manager.h @@ -73,6 +73,16 @@ class GraphManager { Status AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options, const OmgContext &omg_context); + /// + /// @ingroup ge_graph + /// @brief add a copy graph + /// @param [in] graph_id graph id + /// @param [out] Graph output graph + /// @return Status result of function + /// + Status AddGraphWithCopy(const GraphId &graph_id, const Graph &graph, + const std::map &options, const OmgContext &omg_context); + /// /// @ingroup ge_graph /// @brief remove specific graph diff --git a/src/ge/graph/manager/rdma_pool_allocator.cc b/src/ge/graph/manager/rdma_pool_allocator.cc index feea06d9..0a5b9174 100644 --- a/src/ge/graph/manager/rdma_pool_allocator.cc +++ b/src/ge/graph/manager/rdma_pool_allocator.cc @@ -202,7 +202,7 @@ Status RdmaPoolAllocator::GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size) { GELOGE(INTERNAL_ERROR, "Rdma base addr is nullptr."); return INTERNAL_ERROR; } - base_addr = reinterpret_cast(reinterpret_cast(rdma_base_addr_)); + base_addr = static_cast(reinterpret_cast(rdma_base_addr_)); mem_size = rdma_mem_size_; return SUCCESS; } diff --git a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc index d59f5928..02f57b24 100644 --- a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -681,6 +681,11 @@ Status GraphOptimize::HandleMemoryRWConflict(ComputeGraphPtr &compute_graph) { } // 2.loop all node, including node in subgraph and handle memory rw conflict for (auto &node : compute_graph->GetAllNodes()) { + // ignore while subgraph node + const auto parent_node = node->GetOwnerComputeGraph()->GetParentNode(); + if ((parent_node != nullptr) && (kWhileOpTypes.count(parent_node->GetType()) > 0)) { + continue; + } // ignore data / netoutput of subgraph if (node->GetType() == DATA && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX)) { continue; diff --git a/src/ge/graph/partition/graph_partition.cc b/src/ge/graph/partition/graph_partition.cc index e6c7e64f..8d4bcf51 100644 --- a/src/ge/graph/partition/graph_partition.cc +++ b/src/ge/graph/partition/graph_partition.cc @@ -534,6 +534,7 @@ Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) { } const NodeEngineMap *node_engine_map = graph_info_.engine_placer_.GetNodeEngineMap(); size_t temp_index = 0; + // travese nodes by topo order one by one for (const auto &node : compute_graph->GetDirectNode()) { std::string temp_stream; // node opdesc has been checked before @@ -558,9 +559,21 @@ Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) { } new_cluster->nodes_.push_back(node); if (!HasNoInput(node)) { + auto node_id = node->GetOpDesc()->GetId(); for (const auto &parent : node->GetInAllNodes()) { - new_cluster->in_clu_.insert(graph_info_.node_2_cluster_.at(parent)->index_); - graph_info_.node_2_cluster_.at(parent)->out_clu_.insert(temp_index); + auto parent_id = parent->GetOpDesc()->GetId(); + if (parent_id < node_id) { + auto iter = graph_info_.node_2_cluster_.find(parent); + if (iter == graph_info_.node_2_cluster_.end()) { + GELOGE(FAILED, + "[GraphPartitioner]: node[%s]id[%ld]'s parent_node[%s]id[%ld]" + "should make cluster in advance", + node->GetOpDesc()->GetName().c_str(), node_id, parent->GetOpDesc()->GetName().c_str(), parent_id); + return FAILED; + } + new_cluster->in_clu_.insert(iter->second->index_); + iter->second->out_clu_.insert(temp_index); + } } } graph_info_.node_2_cluster_[node] = new_cluster; @@ -588,7 +601,7 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorGetName()); + GE_DUMP(sub_graph, sub_graph->GetName() + "_" + mode_2_str_[graph_info_.mode_]); if (!session_graph_id.empty()) { GE_IF_BOOL_EXEC(!AttrUtils::SetStr(sub_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id), GELOGW("SetStr ATTR_NAME_SESSION_GRAPH_ID failed");) diff --git a/src/ge/graph/partition/graph_partition.h b/src/ge/graph/partition/graph_partition.h index c4425355..2f93c644 100644 --- a/src/ge/graph/partition/graph_partition.h +++ b/src/ge/graph/partition/graph_partition.h @@ -176,6 +176,8 @@ class GraphPartitioner { Graph2InputNodesSubGraphInfo graph_2_input_subgraph_; GraphPartitionInfo graph_info_; uint32_t partition_times_; // times of call partition + std::map mode_2_str_ = { + {kPartitioning, "Partitioning"}, {kSecondPartitioning, "SecondPartitioning"}, {kMerging, "Merging"}}; friend class GraphManager; }; } // namespace ge diff --git a/src/ge/graph/passes/mark_agnostic_pass.cc b/src/ge/graph/passes/mark_agnostic_pass.cc index 62aa46fd..7f6fd965 100644 --- a/src/ge/graph/passes/mark_agnostic_pass.cc +++ b/src/ge/graph/passes/mark_agnostic_pass.cc @@ -22,7 +22,7 @@ namespace ge { Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { for (const auto &node : graph->GetDirectNode()) { auto node_type = NodeUtils::GetNodeType(*node); - if (node_type == SWITCH || node_type == REFSWITCH || node_type == SWITCHN) { + if (node_type == SWITCH || node_type == SWITCHN) { GELOGD("Mark format agnostic and continuous for switch node %s", node->GetName().c_str()); const OpDescPtr op_desc = node->GetOpDesc(); const GeTensorDescPtr op_tensor = op_desc->MutableInputDesc(0); @@ -38,10 +38,15 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { if (node_type == IDENTITY) { GELOGD("Mark format agnostic for identity node %s", node->GetName().c_str()); AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); + continue; + } + if (node_type == REFMERGE || node_type == REFSWITCH) { + GELOGD("Mark format agnostic for regmerge and refswitch node %s", node->GetName().c_str()); + AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); AttrUtils::SetListInt(node->GetOpDesc(), "_format_agnostic_except_input", std::vector({1})); continue; } - if (node_type == MERGE || node_type == REFMERGE) { + if (node_type == MERGE) { GELOGD("Mark format agnostic and continuous for merge node %s", node->GetName().c_str()); const OpDescPtr op_desc = node->GetOpDesc(); const GeTensorDescPtr op_tensor = op_desc->MutableOutputDesc(0); diff --git a/src/ge/graph/passes/memcpy_addr_async_pass.cc b/src/ge/graph/passes/memcpy_addr_async_pass.cc index 934f4737..f4bf9c00 100644 --- a/src/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/src/ge/graph/passes/memcpy_addr_async_pass.cc @@ -25,6 +25,18 @@ namespace ge { Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str()); + return SUCCESS; + } + + int64_t value = 0; + rtError_t rt_ret = rtGetRtCapability(FEATURE_TYPE_MEMCPY, MEMCPY_INFO_SUPPORT_ZEROCOPY, &value); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtGetRtCapability failed, error=0x%x.", rt_ret); + return RT_FAILED; + } + for (auto &node : graph->GetAllNodes()) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, continue); @@ -193,9 +205,10 @@ NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &gr const OutDataAnchorPtr &out_data_anchor, const NodePtr &out_of_user_data) { GELOGD("Start CreateMemcpyAddrAsyncNode."); + static uint32_t new_node_index = 0; OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_CHK_BOOL_EXEC(pre_op_desc != nullptr, return nullptr, "Op_desc of pre node is invalid."); - std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC; + std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC + "_" + std::to_string(new_node_index++); OpDescPtr op_desc = MakeShared(node_name, MEMCPYADDRASYNC); GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); @@ -210,9 +223,18 @@ NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &gr return nullptr; } - int64_t stream_id = out_of_user_data->GetOpDesc()->GetStreamId(); - op_desc->SetStreamId(stream_id); - GELOGI("SetStreamId: Node %s assign stream is %ld.", op_desc->GetName().c_str(), stream_id); + string stream_label; + if (AttrUtils::GetStr(out_of_user_data->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) { + (void)AttrUtils::SetStr(op_desc, ATTR_NAME_STREAM_LABEL, stream_label); + GELOGD("Node %s set stream label: %s", op_desc->GetName().c_str(), stream_label.c_str()); + } + + bool rts_label_node = false; + if (AttrUtils::GetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_RTS_LABEL_NODE, rts_label_node)) { + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, rts_label_node); + GELOGD("Node %s set rts label node attribute", op_desc->GetName().c_str()); + } + bool labeled_input = false; (void)ge::AttrUtils::GetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, labeled_input); if (labeled_input) { diff --git a/src/ge/graph/passes/merge_pass.cc b/src/ge/graph/passes/merge_pass.cc index 8e691518..dc4451e9 100644 --- a/src/ge/graph/passes/merge_pass.cc +++ b/src/ge/graph/passes/merge_pass.cc @@ -79,6 +79,13 @@ Status MergePass::Run(NodePtr &node) { return FAILED; } } + auto in_node = in_data_nodes.at(0); + if (IsMergeInputNeedOptimized(in_node)) { + if (IsolateAndDeleteNode(in_node, {0}) != SUCCESS) { + GELOGE(FAILED, "Isolate and delete node %s failed.", in_node->GetName().c_str()); + return FAILED; + } + } return IsolateAndDeleteNode(node, merge_io_map); } default: { @@ -173,4 +180,27 @@ Status MergePass::CreateConstByValue(NodePtr &node, int value_index, OpDescPtr & GE_CHK_STATUS_RET(op_desc->AddOutputDesc(original_out_tensor_desc), "add out put desc failed"); return SUCCESS; } + +bool MergePass::IsMergeInputNeedOptimized(NodePtr &node) const { + if (node == nullptr) { + return false; + } + // node is not inserted by MergeInputMemcpyPass + if ((node->GetType() != MEMCPYASYNC) && (node->GetType() != MEMCPYADDRASYNC)) { + return false; + } + if (node->GetInDataNodes().size() != 1) { + return false; + } + + auto in_node = node->GetInDataNodes().at(0); + if (in_node == nullptr) { + return false; + } + // in_node may be global_step var + if ((in_node->GetType() == VARIABLE) || (in_node->GetType() == VARIABLEV2)) { + return false; + } + return true; +} } // namespace ge diff --git a/src/ge/graph/passes/merge_pass.h b/src/ge/graph/passes/merge_pass.h index ef586713..dd39b591 100644 --- a/src/ge/graph/passes/merge_pass.h +++ b/src/ge/graph/passes/merge_pass.h @@ -28,6 +28,7 @@ class MergePass : public BaseNodePass { bool IsNeedChangeIndexToConstant(NodePtr &node) const; Status ChangeIndexToConstant(NodePtr &node, int &value_index); Status CreateConstByValue(NodePtr &node, int value_index, OpDescPtr &op_desc); + bool IsMergeInputNeedOptimized(NodePtr &node) const; }; } // namespace ge #endif // GE_GRAPH_PASSES_MERGE_PASS_H_ diff --git a/src/ge/graph/passes/net_output_pass.cc b/src/ge/graph/passes/net_output_pass.cc index 8ded625c..b190722f 100644 --- a/src/ge/graph/passes/net_output_pass.cc +++ b/src/ge/graph/passes/net_output_pass.cc @@ -103,6 +103,12 @@ Status NetOutputPass::GetOutputNode(const ge::ComputeGraphPtr &graph, std::vecto GELOGI("user set out node [%s] is found in user def targets, out node is prio!", ele.first->GetName().c_str()); targets_.erase(iter); } + + auto op_desc = ele.first->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (op_desc->HasAttr(ATTR_ATC_USER_DEFINE_OUTPUT_NODES)) { + is_user_define_ouput_nodes = true; + } output_nodes_info.push_back({ele.first, ele.second, -1}); } GELOGI("Output node set by user or leaf node, size:%zu.", output_nodes_info.size()); @@ -414,7 +420,7 @@ Status NetOutputPass::ProcessWithNetoutput(const ge::ComputeGraphPtr &graph, con Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraphPtr &graph, const ge::NodePtr &net_out_node) { GE_CHECK_NOTNULL(net_out_node); - if (!GetLocalOmgContext().user_out_nodes.empty()) { + if (!GetLocalOmgContext().user_out_nodes.empty() || is_user_define_ouput_nodes) { GELOGI("No need to add ctrl edge to netoutput because user out nodes have been set."); return SUCCESS; } diff --git a/src/ge/graph/passes/net_output_pass.h b/src/ge/graph/passes/net_output_pass.h index 567d1246..2ee5847d 100644 --- a/src/ge/graph/passes/net_output_pass.h +++ b/src/ge/graph/passes/net_output_pass.h @@ -220,6 +220,7 @@ class NetOutputPass : public GraphPass { bool is_include_special_node_ = false; std::set targets_; friend class ReUpdateNetOutputPass; + bool is_user_define_ouput_nodes = false; }; } // namespace ge #endif // GE_GRAPH_PASSES_NET_OUTPUT_PASS_H_ diff --git a/src/ge/graph/passes/next_iteration_pass.cc b/src/ge/graph/passes/next_iteration_pass.cc index 73b3b77e..e59281fd 100644 --- a/src/ge/graph/passes/next_iteration_pass.cc +++ b/src/ge/graph/passes/next_iteration_pass.cc @@ -173,14 +173,17 @@ Status NextIterationPass::FindWhileGroups() { NodePtr next_node = nullptr; if (FindTargetNode(out_node, NEXTITERATION, true, batch_label, next_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get NextIteration node failed."); + GELOGE(INTERNAL_ERROR, + "Get NextIteration node failed: inputs of Merge should be Enter/NextIteration, current_Merge=%s", + out_node->GetName().c_str()); return INTERNAL_ERROR; } batch_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); NodePtr switch_node = nullptr; if (FindTargetNode(out_node, SWITCH, false, batch_label, switch_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get Switch node failed."); + GELOGE(INTERNAL_ERROR, "Get Switch node failed: output of Merge should be Switch, current_Merge=%s", + out_node->GetName().c_str()); return INTERNAL_ERROR; } if (switch_node == nullptr) { @@ -189,7 +192,9 @@ Status NextIterationPass::FindWhileGroups() { NodePtr loop_cond = nullptr; if (FindTargetNode(switch_node, LOOPCOND, true, batch_label, loop_cond) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get LoopCond node failed."); + GELOGE(INTERNAL_ERROR, + "Get LoopCond node failed: pred input of Switch should be LoopCond, current_Switch=%s", + switch_node->GetName().c_str()); return INTERNAL_ERROR; } if (batch_iter.second->loop_cond == nullptr) { diff --git a/src/ge/graph/preprocess/graph_preprocess.cc b/src/ge/graph/preprocess/graph_preprocess.cc index 20216941..16428442 100644 --- a/src/ge/graph/preprocess/graph_preprocess.cc +++ b/src/ge/graph/preprocess/graph_preprocess.cc @@ -217,6 +217,9 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c auto index = TransOpUtil::GetTransOpDataIndex(node_type); if (index < 0) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"The trans node type[" + node_type + "]", "it must be " + TransOpUtil::TransopMapToString()}); GELOGE(INTERNAL_ERROR, "The trans node type %s does not exists", node_type.c_str()); return nullptr; } @@ -385,6 +388,8 @@ Status RecoverTransRoadForVar(const NodePtr &var, const VarTransRoad &road) { auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVar(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage("E15001", {"variable", "index", "type"}, + {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -417,6 +422,8 @@ Status RecoverTransRoadForVarRef(const std::set &nodes, const VarTransR auto trans_name = var->GetName() + "_trans_" + std::to_string(index++); auto ret = RecoverOneTransNodeForVarRef(trans_name, *iter, last_node, last_node); if (ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage("E15001", {"variable", "index", "type"}, + {var->GetName(), std::to_string(index), iter->node_type}); GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(), index, iter->node_type.c_str()); return INTERNAL_ERROR; @@ -570,6 +577,8 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node std::string related_node_name; if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) { if (related_node_name.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E15002", {"opname", "value", "reason"}, + {data_node->GetName(), "flag", "but the value is empty"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node flag, but the value is empty", data_node->GetName().c_str()); return INTERNAL_ERROR; @@ -581,6 +590,9 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node } } if (switchn_node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15002", {"opname", "value", "reason"}, + {data_node->GetName(), related_node_name, "but can not find it on the graph"}); GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph", data_node->GetName().c_str(), related_node_name.c_str()); return INTERNAL_ERROR; @@ -681,6 +693,10 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No ge::GeShape old_shape = input->GetShape(); bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); if (!support) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str()); return FAILED; } @@ -761,6 +777,8 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { + ErrorManager::GetInstance().ATCReportErrMessage( + "15003", {"opname", "format"}, {op_desc->GetName(), TypeUtils::FormatToSerialString(storage_format)}); GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", @@ -900,9 +918,14 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { // check if is_output_adjust_hw_layout is set if (NeedUpdateFormatByOutputTypeParm(op_desc, index)) { if ((old_format != FORMAT_NCHW) && (old_format != FORMAT_NHWC) && (old_format != FORMAT_NC1HWC0)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]", + "only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"}); GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0."); return FAILED; } + GeTensorDesc old_desc(old_shape, old_format, old_dtype); if (ProcessNetoutputNodeFp16Nc1hwc0DynShape(old_desc, net_output_input_desc, src_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0."); @@ -1035,6 +1058,9 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i } bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end()); if (!is_acceptable) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E15005", {"opname", "optype", "opname1", "optype1"}, + {op_desc->GetName(), node->GetType(), input_op_desc->GetName(), input_op_desc->GetType()}); GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.", node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(), input_op_desc->GetType().c_str()); @@ -1127,6 +1153,9 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { } if ((index < 0) || (static_cast(index) >= user_input.size())) { + std::string situation = "data op index[" + std::to_string(index) + "]"; + std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(PARAM_INVALID, "user_input size = %zu, graph data op index = %ld.", user_input.size(), index); return FAILED; } @@ -1139,6 +1168,11 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { if (need_check_internal_format) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); if (is_internal) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + + TypeUtils::FormatToSerialString(origin_format) + "]", + "it is not support"}); GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); @@ -1150,6 +1184,9 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; @@ -1164,6 +1201,10 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { return FAILED); bool size_check = (size != 0 && shape_size != size); if (size_check) { + std::string situation = + "input data size[" + std::to_string(size) + "] and shape_size[" + std::to_string(size) + "]"; + std::string reason = "because size != 0 and shape_size != size"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(PARAM_INVALID, "input data size =%ld, shape_size =%ld.", size, shape_size); return FAILED; } @@ -1503,6 +1544,9 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); if (!type_ret) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E19025", {"situation", "reason"}, + {"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"}); GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return FAILED; } @@ -1512,14 +1556,20 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { if (shape_size == 0) { if (ge_tensor_desc.GetShape().GetDims().size() == 0) { // shape = [], means it's a sclar tensor. - GE_CHK_BOOL_EXEC(data_size / length == 1, return PARAM_INVALID, "Const is invalid scalar tensor."); + GE_CHK_BOOL_EXEC(data_size / length == 1, ErrorManager::GetInstance().ATCReportErrMessage( + "E10043", {"reason"}, {"Const is invalid scalar tensor."}); + return PARAM_INVALID, "Const is invalid scalar tensor."); } else { // shape = [x, y, 0,...], means it's a vector tensor that value is []. - GE_CHK_BOOL_EXEC(data_size == 0, return PARAM_INVALID, "Const is invalid vector scalar."); + GE_CHK_BOOL_EXEC(data_size == 0, ErrorManager::GetInstance().ATCReportErrMessage( + "E10043", {"reason"}, {"Const is invalid vector scalar."}); + return PARAM_INVALID, "Const is invalid vector scalar."); } } else { - GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, return PARAM_INVALID, - "Const input data size is not equal with tensor desc shape"); + GE_CHK_BOOL_EXEC(data_size == static_cast(shape_size * length) && data_size != 0, + ErrorManager::GetInstance().ATCReportErrMessage( + "E10043", {"reason"}, {"Const input data size is not equal with tensor desc shape"}); + return PARAM_INVALID, "Const input data size is not equal with tensor desc shape"); } return SUCCESS; } @@ -1543,6 +1593,9 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { return GE_GRAPH_INIT_FAILED; } if ((index < 0) || (static_cast(index) >= user_input.size())) { + std::string situation = "data op index[" + std::to_string(index) + "]"; + std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "user_input size:%zu, data op index:%ld.", user_input.size(), index); return GE_GRAPH_INIT_FAILED; } @@ -1550,6 +1603,10 @@ Status GraphPrepare::CheckUserInput(const std::vector &user_input) { for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { if (desc.GetShape().GetDim(i) < 0) { + std::string situation = + "data dim[" + std::to_string(i) + "][" + std::to_string(desc.GetShape().GetDim(i)) + "]"; + std::string reason = "it need >= 0"; + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, desc.GetShape().GetDim(i)); return GE_GRAPH_INIT_FAILED; diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 89c852d5..1aceafdf 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -53,16 +53,6 @@ } \ } while (0) -#define AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(expr, _status, errormsg) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, errormsg); \ - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \ - return _status; \ - } \ - } while (0) - namespace { const int32_t DEFAULT_MATRIX_R0C0_YUV2RGB = 298; const int32_t DEFAULT_MATRIX_R0C1_YUV2RGB = 0; @@ -317,9 +307,8 @@ NodePtr AippOp::FindDataByIndex(const ComputeGraphPtr &graph, int rank) { } return node; } - GELOGE(PARAM_INVALID, "Can not find the data node by index %d", rank); - string errormsg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + string error_msg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return nullptr; } Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr &target, @@ -364,10 +353,10 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr } if (!edge_indexes.empty() && (*edge_indexes.rbegin() >= data_node->GetOutDataNodes().size())) { - GELOGE(PARAM_INVALID, "input_edge_idx %u should smaller than out edge size of target input %zu", - *edge_indexes.rbegin(), data_node->GetOutDataNodes().size()); - string errormsg = "The aipp parameter input_edge_idx should be smaller than the target input's outnodes."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + string error_msg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) + + "] should be smaller than the target input[" + + std::to_string(data_node->GetOutDataNodes().size()) + "]'s outnodes."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } target = data_node; @@ -442,8 +431,7 @@ Status AippOp::ConvertRelatedInputNameToRank() { string error_msg = "Top name " + related_input_name + "convert rank failed, Please" " ensure top name in aipp config is the top name of data node."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, "Top name[%s] converts rank failed.", related_input_name.c_str()); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } @@ -539,87 +527,87 @@ Status AippOp::SetDefaultParams() { Status AippOp::ValidateParams() { GE_CHECK_NOTNULL(aipp_params_); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, - "When insert AIPP op, aipp_mode must be configured as static or dynamic "); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_2 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_3 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, - "The parameter output_bias_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, - "The parameter output_bias_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, - "The parameter output_bias_2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, - "The parameter input_bias_0 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, - "The parameter input_bias_1 can not be configed repeatedly"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, - "The parameter input_bias_2 can not be configed repeatedly"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, - "The parameter input_edge_idx can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, + "When insert AIPP op, aipp_mode must be configured as static or dynamic "); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_2 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_3 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, + "The parameter output_bias_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, + "The parameter output_bias_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, + "The parameter output_bias_2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, + "The parameter input_bias_0 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, + "The parameter input_bias_1 can not be configed repeatedly"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, + "The parameter input_bias_2 can not be configed repeatedly"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, + "The parameter input_edge_idx can not be configed repeatedly"); const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode(); if (aipp_mode == domi::AippOpParams::dynamic) { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + GE_CHK_LOG_AND_ERRORMSG( aipp_params_->max_src_image_size() > 0, PARAM_INVALID, "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); } else { - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, - "Input format of AIPP conf is undefined"); - - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, - "Src_image_size_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, - "Src_image_size_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, - "Load_start_pos_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, - "Load_start_pos_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, - "Crop_size_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, - "Resize_output_w must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, - "Resize_output_h must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, - "Left_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, - "Right_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, - "Top_padding_size must not be configed smaller than 0"); - AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, - "Bottom_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, + "Input format of AIPP conf is undefined"); + + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, + "Src_image_size_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, + "Src_image_size_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, + "Load_start_pos_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, + "Load_start_pos_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, + "Crop_size_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, + "Resize_output_w must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, + "Resize_output_h must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, + "Left_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, + "Right_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, + "Top_padding_size must not be configed smaller than 0"); + GE_CHK_LOG_AND_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, + "Bottom_padding_size must not be configed smaller than 0"); } return SUCCESS; @@ -792,17 +780,20 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { - GELOGE(PARAM_INVALID, "Get data_node dims and transfer to nchw_dims failed!"); + string error_msg = "Get data_node dims and transfer to nchw_dims failed!"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (batch_count <= 0) { - GELOGE(PARAM_INVALID, "Batch count %ld is invalid", batch_count); + string error_msg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive."; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count); if (max_dynamic_aipp_size < 0) { - GELOGE(PARAM_INVALID, "The dynamic aipp size is not positive."); + string error_msg = "The dynamic aipp size is not positive"; + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index ba0a3e55..3e1a612d 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -40,8 +40,6 @@ using domi::AippOpParams; namespace ge { namespace { const char *const kMbatchSwitchnName = "mbatch-switch-name"; -const int64_t kFormatAgnosticSwitch = 1; -const int64_t kFormatDependInputIndex = 1; } // namespace static void ConvertShape2Nhwc(Format &format, vector &shape_vec) { if ((format == FORMAT_NHWC) || (shape_vec.size() != static_cast(NORMAL_TENSOR_SIZE))) { @@ -127,20 +125,14 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { string error_msg = "Can not both set related_input_name and related_input_rank!" " Please ensure param is the same with the first aipp config(related_input_name)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_name)."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (item->related_input_name() == another_item->related_input_name()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } } @@ -161,20 +153,14 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { string error_msg = "Can not both set related_input_rank and related_input_name!" " Please ensure param is the same with the first aipp config(related_input_rank)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_rank)."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } if (item->related_input_rank() == another_item->related_input_rank()) { string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); + GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str()); return PARAM_INVALID; } } @@ -229,9 +215,9 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { } } } - GE_CHK_BOOL_RET_STATUS((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), PARAM_INVALID, - "Can not config part of outputs of Data node to support AIPP, config all " - "of the outputs of Data to support AIPP, or config none of them"); + GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), PARAM_INVALID, + "Can not config part of outputs of Data node to support AIPP, config all " + "of the outputs of Data to support AIPP, or config none of them"); std::unique_ptr aippParams(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(aippParams); @@ -243,15 +229,16 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); if (aippMode == domi::AippOpParams::static_) { - GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, - "The input_format of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, - "The src_image_size_w of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, - "The src_image_size_h of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, + "The input_format of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, + "The src_image_size_w of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, + "The src_image_size_h of all aipp_ops after one Data should be the same"); } else { - GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), PARAM_INVALID, - "The max_src_image_size of all aipp_ops after one Data should be the same"); + GE_CHK_LOG_AND_ERRORMSG(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), + PARAM_INVALID, + "The max_src_image_size of all aipp_ops after one Data should be the same"); } }); } @@ -271,23 +258,6 @@ Status InsertNewOpUtil::GetAippParams(const std::unique_ptr return SUCCESS; } -Status InsertNewOpUtil::AddFormatAgnosticAttrToSwitchn(const NodePtr &aipp_node) { - GE_CHECK_NOTNULL(aipp_node); - auto next_nodes = aipp_node->GetOutDataNodes(); - for (const auto next_node : next_nodes) { - GE_CHECK_NOTNULL(next_node); - auto op_desc = next_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - if (op_desc->GetType() == SWITCHN) { - GELOGI("Find switchn node [%s] after aipp [%s]", op_desc->GetName().c_str(), aipp_node->GetName().c_str()); - (void)AttrUtils::SetInt(op_desc, "_format_agnostic", kFormatAgnosticSwitch); - (void)AttrUtils::SetListInt(op_desc, "_format_agnostic_except_input", - std::vector({kFormatDependInputIndex})); - } - } - return SUCCESS; -} - Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { std::map switchn_names_to_data; std::set updated_switchn; @@ -302,9 +272,6 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { } if (node->GetType() == AIPP) { GE_RETURN_IF_ERROR(UpdatePrevNodeByAipp(node, updated_switchn)); - // In dynamic batch/HW and dynamic aipp scend, switchn should be set format agnostic, otherwise transdata maybe - // inserted between aipp and switchn which introduce performance and memory increase problem. - GE_RETURN_IF_ERROR(AddFormatAgnosticAttrToSwitchn(node)); } if (node->GetType() == CASE && node->GetOpDesc()->HasAttr(ATTR_NAME_BATCH_NUM)) { multbatch_case = node; @@ -314,7 +281,8 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) { for (auto &switchn : updated_switchn) { auto data_iter = switchn_names_to_data.find(switchn->GetName()); if (data_iter == switchn_names_to_data.end()) { - GELOGE(INTERNAL_ERROR, "Failed to find relative data node by switchn %s", switchn->GetName().c_str()); + string error_msg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]"; + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } GE_RETURN_IF_ERROR(UpdateDataBySwitchN(switchn, data_iter->second)); @@ -501,7 +469,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } } if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) { - GELOGE(INTERNAL_ERROR, "No max size found from switchn node %s", switchn->GetName().c_str()); + string error_msg = "No max size found from switchn node[" + switchn->GetName() + "]"; + GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str()); return INTERNAL_ERROR; } auto output_desc = switchn->GetOpDesc()->MutableOutputDesc(max_index); diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index 1d166096..4257e483 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -68,7 +68,6 @@ class InsertNewOpUtil { void UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format); Status UpdatePrevNodeByAipp(NodePtr &node, std::set &switchns); Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data); - Status AddFormatAgnosticAttrToSwitchn(const NodePtr &aipp_node); Status GetDataRelatedNode(NodePtr &node, std::map> &data_next_node_map); Status GetAllAipps(const NodePtr &data_node, const NodePtr &node, std::vector &aipps); Status GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, std::string &output); diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index b22e4566..2d852053 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -593,6 +593,8 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector &start_ } auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); if (!IsAllDimsPositive(dims)) { + ErrorManager::GetInstance().ATCReportErrMessage("E15004", {"opname", "shape"}, + {node->GetName(), formats::ShapeToString(dims)}); GELOGE(INTERNAL_ERROR, "Failed to copy multi batch graph, the node %s still has unknown shape %s", node->GetName().c_str(), formats::ShapeToString(dims).c_str()); return INTERNAL_ERROR; @@ -1023,6 +1025,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { + const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); + if (multi_batch_with_case != nullptr) { + PassManager pass_manager; + GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); + return pass_manager.Run(graph); + } + std::vector> shapes; if (!InitDynamicParams(shapes)) { GELOGD("There is no multi-batch options, no need to process multi-batch copy"); diff --git a/src/ge/graph/preprocess/multi_batch_options.cc b/src/ge/graph/preprocess/multi_batch_options.cc index e22af692..4ebae20c 100644 --- a/src/ge/graph/preprocess/multi_batch_options.cc +++ b/src/ge/graph/preprocess/multi_batch_options.cc @@ -124,6 +124,8 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, auto tmp_index = cur_data_index; for (size_t i = 0; i < static_cast(dynamic_dims_num); ++i) { if (tmp_index >= dynamic_gear_info.size()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10045", {"name", "shape"}, + {data_name, formats::JoinToString(data_shape)}); GELOGE(PARAM_INVALID, "Data: %s shape: %s make dynamic dims overflow", data_name.c_str(), formats::JoinToString(data_shape).c_str()); return FAILED; @@ -131,6 +133,8 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, one_gear.push_back(dynamic_gear_info[tmp_index++]); } } else { + ErrorManager::GetInstance().ATCReportErrMessage("E10046", {"name", "shape"}, + {data_name, formats::JoinToString(data_shape)}); GELOGE(PARAM_INVALID, "Dynamic dims num of data: %s shape: %s can not be more than one gear dynamic info size", data_name.c_str(), formats::JoinToString(data_shape).c_str()); return FAILED; diff --git a/src/ge/host_cpu_engine/module.mk b/src/ge/host_cpu_engine/module.mk index 6c4932a3..e6cfce80 100644 --- a/src/ge/host_cpu_engine/module.mk +++ b/src/ge/host_cpu_engine/module.mk @@ -9,12 +9,15 @@ local_lib_src_files := engine/host_cpu_engine.cc \ local_lib_inc_path := proto/task.proto \ ${LOCAL_PATH} \ ${TOPDIR}inc \ + ${TOPDIR}metadef/inc \ + ${TOPDIR}graphengine/inc \ ${TOPDIR}inc/external \ - ${TOPDIR}inc/external/graph \ + ${TOPDIR}metadef/inc/external \ + ${TOPDIR}graphengine/inc/external \ + ${TOPDIR}metadef/inc/external/graph \ $(TOPDIR)libc_sec/include \ ${TOPDIR}third_party/protobuf/include \ - ${TOPDIR}inc/framework \ - $(TOPDIR)framework/domi \ + ${TOPDIR}graphengine/inc/framework \ $(TOPDIR)graphengine/ge \ #compiler for host diff --git a/src/ge/host_kernels/slice_kernel.cc b/src/ge/host_kernels/slice_kernel.cc index 5f72fc49..0314e4e5 100644 --- a/src/ge/host_kernels/slice_kernel.cc +++ b/src/ge/host_kernels/slice_kernel.cc @@ -100,7 +100,9 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetOutputDesc(0); + GeTensorDesc output_tensor_desc(attr_output_tensor_desc); + output_tensor_desc.SetShape(output_shape); GeTensorPtr output_ptr = MakeShared(output_tensor_desc); if (output_ptr == nullptr) { GELOGW("make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str()); diff --git a/src/ge/hybrid/common/npu_memory_allocator.cc b/src/ge/hybrid/common/npu_memory_allocator.cc index da365bf7..012a9941 100644 --- a/src/ge/hybrid/common/npu_memory_allocator.cc +++ b/src/ge/hybrid/common/npu_memory_allocator.cc @@ -45,16 +45,9 @@ NpuMemoryAllocator *NpuMemoryAllocator::GetAllocator() { NpuMemoryAllocator::NpuMemoryAllocator(uint32_t device_id) : device_id_(device_id) {} void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { - void *try_reuse_addr = nullptr; size_t allocate_size = size; MemStorageType mem_type = HBM; if (attr != nullptr) { - try_reuse_addr = attr->try_reuse_addr_; - if (attr->padding_ != 0) { - // padding up to multiple of attr->padding, and add extra attr->padding_ - allocate_size = (size + 2 * attr->padding_ - 1) / attr->padding_ * attr->padding_; - GELOGD("Padding size %ld by %d. final size = %zu.", size, attr->padding_, allocate_size); - } mem_type = attr->mem_type_; } @@ -69,6 +62,17 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { } else if (mem_type == HOST_DDR) { buffer = malloc(allocate_size); } else { + void *try_reuse_addr = nullptr; + int padding = kDefaultPadding; + if (attr != nullptr) { + try_reuse_addr = attr->try_reuse_addr_; + if (attr->padding_ > 0) { + padding = attr->padding_; + } + } + // padding up to multiple of padding, and add extra padding + allocate_size = (size + 2 * padding - 1) / padding * padding; + GELOGD("Padding size %ld by %d. final size = %zu.", size, padding, allocate_size); buffer = MemManager::Instance() .CachingInstance(RT_MEMORY_HBM) .Malloc(allocate_size, reinterpret_cast(try_reuse_addr), device_id_); diff --git a/src/ge/hybrid/executor/worker/execution_engine.cc b/src/ge/hybrid/executor/worker/execution_engine.cc index 6ccf311f..363e993f 100644 --- a/src/ge/hybrid/executor/worker/execution_engine.cc +++ b/src/ge/hybrid/executor/worker/execution_engine.cc @@ -105,8 +105,10 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { vector host_buffer(static_cast(tensor_size)); GELOGD("[%s] To cache output[%d] to host, size = %zu", node_item.NodeName().c_str(), output_idx, output_tensor->GetSize()); - GE_CHK_RT_RET( - rtMemcpy(host_buffer.data(), tensor_size, output_tensor->GetData(), tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); + if (tensor_size > 0) { + GE_CHK_RT_RET( + rtMemcpy(host_buffer.data(), tensor_size, output_tensor->GetData(), tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); + } tensor.SetData(std::move(host_buffer)); string session_id = std::to_string(context_->GetSessionId()); RuntimeInferenceContext *runtime_infer_ctx = nullptr; @@ -234,7 +236,9 @@ Status NodeDoneCallback::ProfilingReport() { return profiling_ret; } - ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info); + auto &profiling_manager = ProfilingManager::Instance(); + profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info, + !profiling_manager.IsAclApiMode()); return SUCCESS; } diff --git a/src/ge/hybrid/model/hybrid_model_builder.cc b/src/ge/hybrid/model/hybrid_model_builder.cc index f22e50f9..9beab52a 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.cc +++ b/src/ge/hybrid/model/hybrid_model_builder.cc @@ -249,7 +249,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s } // cond or branch need to be prepared before the execution of IF or CASE - if (node_item.node_type == IF || node_item.node_type == CASE) { + if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) { const auto &in_anchor = ge_node->GetInDataAnchor(0); GE_CHECK_NOTNULL(in_anchor); const auto &peer_anchor = in_anchor->GetPeerOutAnchor(); @@ -653,6 +653,8 @@ Status HybridModelBuilder::LoadGraph() { } else { GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item), "[%s] Failed to identify ref outputs.", parent_node_item->NodeName().c_str()); + GE_CHK_STATUS_RET(IdentifySameInputs(*parent_node_item), "[%s] Failed to identify same outputs.", + parent_node_item->NodeName().c_str()); // if parent is function control op. need add a virtual partitioned call if (parent_node_item->IsControlOp()) { @@ -858,7 +860,7 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr auto parent_node = sub_graph.GetParentNode(); GE_CHECK_NOTNULL(parent_node); auto op_type = parent_node->GetType(); - if (op_type == IF || op_type == CASE || op_type == WHILE) { + if (IsControlOp(op_type)) { GELOGD("Set ge_model for control op subgraph: [%s], task_size = %d", sub_graph.GetName().c_str(), ge_model->GetModelTaskDefPtr()->task_size()); subgraph_models_.emplace(sub_graph.GetName(), ge_model); @@ -1087,6 +1089,43 @@ Status HybridModelBuilder::InitRuntimeParams() { return SUCCESS; } +Status HybridModelBuilder::IdentifySameInputs(NodeItem &node_item) { + GELOGD("Start to parse same inputs on net output: %s", node_item.NodeName().c_str()); + auto subgraph = NodeUtils::GetSubgraph(*node_item.node, kSubgraphIndex); + GE_CHECK_NOTNULL(subgraph); + auto net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT); + if (net_output_node == nullptr) { + GELOGD("Subgraph [%s] does not have net output", subgraph->GetName().c_str()); + return SUCCESS; + } + + auto net_output_desc = net_output_node->GetOpDesc(); + GE_CHECK_NOTNULL(net_output_desc); + + std::map connected_inputs; + for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) { + auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (out_data_anchor == nullptr) { + continue; + } + auto src_node = out_data_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(src_node); + auto op_desc = src_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + + std::string input_key = std::to_string(op_desc->GetId()) + "_" + std::to_string(out_data_anchor->GetIdx()); + auto it = connected_inputs.find(input_key); + if (it == connected_inputs.end()) { + connected_inputs.emplace(input_key, in_data_anchor->GetIdx()); + } else { + GELOGD("[%s] output [%d] reuse output [%d] input node = %s, idx = %d.", node_item.NodeName().c_str(), + in_data_anchor->GetIdx(), it->second, src_node->GetName().c_str(), out_data_anchor->GetIdx()); + node_item.reuse_outputs.emplace(in_data_anchor->GetIdx(), it->second); + } + } + return SUCCESS; +} + Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { GELOGD("Start to parse outputs of node: %s", node_item.NodeName().c_str()); auto subgraph = NodeUtils::GetSubgraph(*node_item.node, kSubgraphIndex); diff --git a/src/ge/hybrid/model/hybrid_model_builder.h b/src/ge/hybrid/model/hybrid_model_builder.h index d7f6dcf7..663c87fc 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.h +++ b/src/ge/hybrid/model/hybrid_model_builder.h @@ -57,6 +57,7 @@ class HybridModelBuilder { Status LoadGeModel(ComputeGraph &graph, const GeModelPtr &ge_model); Status LoadTasks(); Status IdentifyVariableOutputs(NodeItem &node_item); + Status IdentifySameInputs(NodeItem &node_item); Status BuildNodeItem(const NodePtr &node, NodeItem &node_item); Status GetOrCreateNodeItem(const NodePtr &node, NodeItem **node_item); Status ParseDependentInputNodes(NodeItem &node_item, const std::vector &dependencies); diff --git a/src/ge/hybrid/model/node_item.cc b/src/ge/hybrid/model/node_item.cc index fa6d28d9..4c55d3ed 100644 --- a/src/ge/hybrid/model/node_item.cc +++ b/src/ge/hybrid/model/node_item.cc @@ -28,6 +28,7 @@ namespace hybrid { namespace { const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; const char *const kNodeTypeRetVal = "_RetVal"; +std::set kControlOpTypes{IF, STATELESSIF, CASE, WHILE, STATELESSWHILE}; Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { uint32_t parent_index = 0; @@ -96,6 +97,9 @@ Status ParseFusedSubgraph(NodeItem &node_item) { return SUCCESS; } } // namespace + +bool IsControlOp(const std::string &op_type) { return kControlOpTypes.count(op_type) > 0; } + NodeItem::NodeItem(NodePtr node) : node(std::move(node)) { this->op_desc = this->node->GetOpDesc().get(); this->node_id = this->op_desc->GetId(); @@ -145,10 +149,7 @@ Status NodeItem::Init() { return SUCCESS; } -bool NodeItem::IsControlOp() const { - auto op_type = op_desc->GetType(); - return op_type == IF || op_type == CASE || op_type == WHILE || op_type == FOR; -} +bool NodeItem::IsControlOp() const { return ge::hybrid::IsControlOp(op_desc->GetType()); } std::string NodeItem::DebugString() const { std::stringstream ss; diff --git a/src/ge/hybrid/model/node_item.h b/src/ge/hybrid/model/node_item.h index 53cdeca6..c8b0247d 100644 --- a/src/ge/hybrid/model/node_item.h +++ b/src/ge/hybrid/model/node_item.h @@ -36,6 +36,8 @@ struct FusedSubgraph { ComputeGraphPtr graph; }; +bool IsControlOp(const std::string &op_type); + // for caching static information across execution struct NodeItem { explicit NodeItem(NodePtr node); @@ -79,6 +81,7 @@ struct NodeItem { const NodeExecutor *node_executor = nullptr; std::map ref_outputs; std::map reuse_inputs; + std::map reuse_outputs; std::vector is_input_shape_static; bool is_output_shape_static = true; diff --git a/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 698b3ed2..0d361656 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -17,8 +17,6 @@ #include "aicore_node_executor.h" #include "cce/taskdown_common.hpp" #include "hybrid/executor/hybrid_execution_context.h" -#include "init/gelib.h" -#include "hybrid/executor/hybrid_execution_context.h" namespace ge { namespace hybrid { @@ -27,19 +25,10 @@ REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, AiCore AiCoreNodeTask::AiCoreNodeTask(std::vector> &&tasks) : tasks_(std::move(tasks)) {} Status AiCoreNodeExecutor::Initialize() { - auto ge_lib = GELib::GetInstance(); - GE_CHECK_NOTNULL(ge_lib); - if (!ge_lib->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed."); - return GE_CLI_GE_NOT_INITIALIZED; + compiler_ = TaskCompilerFactory::GetInstance().GetTaskCompiler(); + if (compiler_ != nullptr) { + GE_CHK_STATUS_RET(compiler_->Initialize(), "Failed to init aicore task compiler."); } - - auto &kernel_manager = ge_lib->OpsKernelManagerObj(); - auto aic_ops_store = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); - GE_CHECK_NOTNULL(aic_ops_store); - - compiler_.reset(new (std::nothrow) AiCoreTaskCompiler(aic_ops_store)); - GE_CHECK_NOTNULL(compiler_); return SUCCESS; } @@ -119,6 +108,12 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr & GE_CHECK_NOTNULL(op_desc); GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); + auto ori_node_name = node->GetName(); + if (compiler_ == nullptr) { + GELOGE(FAILED, "[%s] Can not find any valid aicore task compiler.", ori_node_name.c_str()); + return FAILED; + } + AiCoreNodeTaskRegistry ®istry = AiCoreNodeTaskRegistry::GetInstance(); std::string shape_key; GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str()); @@ -132,7 +127,6 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr & } std::vector task_defs; - auto ori_node_name = node->GetName(); op_desc->SetName(ori_node_name + "_" + shape_key); GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", ori_node_name.c_str()); op_desc->SetName(ori_node_name); @@ -155,6 +149,13 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr & Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] Start"); + if (IsNoOp(context)) { + GELOGD("[%s] Skipping execution for op with empty outputs", context.GetNodeName()); + auto ret = context.TryExecuteCallback(done_callback); + RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End"); + return ret; + } + auto op_desc = context.GetNodeItem().op_desc; GE_CHECK_NOTNULL(op_desc); GELOGI("[%s] ExecuteAsync Start.", op_desc->GetName().c_str()); @@ -218,5 +219,32 @@ bool AiCoreNodeTask::IsSupportDynamicShape() { return true; } + +bool AiCoreNodeTask::IsNoOp(TaskContext &task_context) { + for (int i = 0; i < task_context.NumOutputs(); ++i) { + const auto &tensor_desc = task_context.MutableOutputDesc(i); + GE_CHECK_NOTNULL(tensor_desc); + const auto &shape = tensor_desc->MutableShape(); + if (shape.IsScalar() || shape.GetShapeSize() > 0) { + return false; + } + } + + return true; +} + +TaskCompilerFactory &TaskCompilerFactory::GetInstance() { + static TaskCompilerFactory instance; + return instance; +} + +void TaskCompilerFactory::Register(CreateFn fn) { compiler_func_ = fn; } + +std::unique_ptr TaskCompilerFactory::GetTaskCompiler() { + auto compiler_instance = std::unique_ptr(compiler_func_()); + return compiler_instance; +} + +CompilerFunctionRegistrar::CompilerFunctionRegistrar(CreateFn fn) { TaskCompilerFactory::GetInstance().Register(fn); } } // namespace hybrid } // namespace ge diff --git a/src/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/src/ge/hybrid/node_executor/aicore/aicore_node_executor.h index 506202fa..173263e3 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/src/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -18,13 +18,21 @@ #define GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ #include "hybrid/node_executor/aicore/aicore_task_builder.h" -#include "hybrid/node_executor/aicore/aicore_task_compiler.h" #include "hybrid/node_executor/node_executor.h" #include #include namespace ge { namespace hybrid { + +class TaskCompiler { + public: + TaskCompiler() = default; + virtual ~TaskCompiler() = default; + virtual Status CompileOp(const NodePtr &node, std::vector &tasks) = 0; + virtual Status Initialize() = 0; +}; + class AiCoreNodeTaskRegistry { public: ~AiCoreNodeTaskRegistry() = default; @@ -54,6 +62,7 @@ class AiCoreNodeTask : public NodeTask { Status ExecuteAsync(TaskContext &context, std::function done_callback) override; private: + static bool IsNoOp(TaskContext &task_context); std::vector> tasks_; }; @@ -65,8 +74,31 @@ class AiCoreNodeExecutor : public NodeExecutor { private: static Status GenNodeKey(const NodePtr &node, std::string &node_key); - std::unique_ptr compiler_; + std::unique_ptr compiler_; +}; + +using CreateFn = TaskCompiler *(*)(); +class TaskCompilerFactory { + public: + static TaskCompilerFactory &GetInstance(); + void Register(CreateFn fn); + std::unique_ptr GetTaskCompiler(); + + private: + CreateFn compiler_func_; +}; + +class CompilerFunctionRegistrar { + public: + CompilerFunctionRegistrar(CreateFn fn); + ~CompilerFunctionRegistrar() = default; }; } // namespace hybrid } // namespace ge + +#define REGISTER_TASK_COMPILER(compiler) \ + static ::ge::hybrid::CompilerFunctionRegistrar register_compiler_function __attribute__((unused)) = \ + ::ge::hybrid::CompilerFunctionRegistrar( \ + []() -> ::ge::hybrid::TaskCompiler * { return new (std::nothrow) compiler(); }) + #endif // GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ diff --git a/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc index e67a36c6..0140f864 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -18,6 +18,7 @@ #include "framework/common/debug/log.h" #include "graph/debug/ge_attr_define.h" #include "opskernel_manager/ops_kernel_builder_manager.h" +#include "init/gelib.h" namespace ge { namespace hybrid { @@ -25,11 +26,22 @@ namespace { uintptr_t kWeightBase = 0x10000000; uintptr_t kMemBase = 0x20000000; uint64_t kFakeSize = 0x10000000UL; +REGISTER_TASK_COMPILER(AiCoreTaskCompiler); } // namespace std::mutex AiCoreTaskCompiler::mu_; -AiCoreTaskCompiler::AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store) - : aic_kernel_store_(std::move(aic_kernel_store)) {} +Status AiCoreTaskCompiler::Initialize() { + auto ge_lib = GELib::GetInstance(); + GE_CHECK_NOTNULL(ge_lib); + if (!ge_lib->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed."); + return GE_CLI_GE_NOT_INITIALIZED; + } + auto &kernel_manager = ge_lib->OpsKernelManagerObj(); + aic_kernel_store_ = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); + GE_CHECK_NOTNULL(aic_kernel_store_); + return SUCCESS; +} Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const { GE_CHECK_NOTNULL(node); diff --git a/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index cc3897ca..23ce6334 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -19,15 +19,17 @@ #include #include "opskernel_manager/ops_kernel_manager.h" +#include "aicore_node_executor.h" namespace ge { namespace hybrid { -class AiCoreTaskCompiler { +class AiCoreTaskCompiler : public TaskCompiler { public: - explicit AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store); + AiCoreTaskCompiler() = default; ~AiCoreTaskCompiler() = default; - Status CompileOp(const NodePtr &node, std::vector &tasks); + Status CompileOp(const NodePtr &node, std::vector &tasks) override; + Status Initialize() override; private: Status DoCompileOp(const NodePtr &node) const; diff --git a/src/ge/hybrid/node_executor/controlop/control_op_executor.cc b/src/ge/hybrid/node_executor/controlop/control_op_executor.cc index 2bf7407c..fcbde3ef 100644 --- a/src/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/src/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -371,11 +371,11 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, const NodePtr & unique_ptr node_task; auto node_type = node->GetType(); - if (node_type == IF) { + if (node_type == IF || node_type == STATELESSIF) { node_task.reset(new (std::nothrow) IfOpNodeTask()); } else if (node_type == CASE) { node_task.reset(new (std::nothrow) CaseOpNodeTask()); - } else if (node_type == WHILE) { + } else if (node_type == WHILE || node_type == STATELESSWHILE) { node_task.reset(new (std::nothrow) WhileOpNodeTask()); } else { GELOGE(PARAM_INVALID, "[%s] Unsupported type: %s", node->GetName().c_str(), node_type.c_str()); diff --git a/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 1edd6135..86ba24fa 100644 --- a/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -189,13 +189,20 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(reinterpret_cast(tv->MutableData())); - addr_infos.resize(dims.front()); - for (auto idx = 0; idx < dims.front(); ++idx) { + auto row_num = dims.front(); + addr_infos.resize(row_num); + auto device_len = tv->GetSize() / row_num; + if (device_len <= 0 || device_len > data[kVarTableIdxLen]) { + GELOGE(FAILED, "Local embedding length is out of range."); + return FAILED; + } + + for (auto idx = 0; idx < row_num; ++idx) { FMK_INT64_MULCHECK(idx, kVarTableRowCnt); auto line_idx = idx * kVarTableRowCnt; addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, - data[line_idx + kVarTableIdxLen]}; - local_addr += data[line_idx + kVarTableIdxLen]; + device_len}; + local_addr += device_len; } return SUCCESS; diff --git a/src/ge/hybrid/node_executor/node_executor.cc b/src/ge/hybrid/node_executor/node_executor.cc index 95f1e17c..e0367000 100644 --- a/src/ge/hybrid/node_executor/node_executor.cc +++ b/src/ge/hybrid/node_executor/node_executor.cc @@ -96,7 +96,7 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node return ExecutorType::GE_LOCAL; } - if (op_type == IF || op_type == CASE || op_type == WHILE) { + if (IsControlOp(op_type)) { return ExecutorType::CONTROL_OP; } diff --git a/src/ge/hybrid/node_executor/task_context.cc b/src/ge/hybrid/node_executor/task_context.cc index 430ec63f..249942eb 100644 --- a/src/ge/hybrid/node_executor/task_context.cc +++ b/src/ge/hybrid/node_executor/task_context.cc @@ -200,14 +200,20 @@ Status TaskContext::AllocateOutput(int index, const GeTensorDesc &tensor_desc, T GE_CHECK_NOTNULL(ref_tensor); outputs_start_[index] = *ref_tensor; } else { - auto reuse_input = node_item_->reuse_inputs.find(index); - if (reuse_input != node_item_->reuse_inputs.end()) { - GELOGD("[%s] Output[%d] is referenced to input[%d]", GetNodeName(), index, reuse_input->second); - outputs_start_[index] = inputs_start_[reuse_input->second]; + auto reuse_output_it = node_item_->reuse_outputs.find(index); + if (reuse_output_it != node_item_->reuse_outputs.end()) { + GELOGD("[%s] reuse output [%d] with output [%d]", GetNodeName(), index, reuse_output_it->second); + outputs_start_[index] = outputs_start_[reuse_output_it->second]; } else { - GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr)); - GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", node_item_->NodeName().c_str(), index, - outputs_start_[index].GetSize()); + auto reuse_input = node_item_->reuse_inputs.find(index); + if (reuse_input != node_item_->reuse_inputs.end()) { + GELOGD("[%s] Output[%d] is referenced to input[%d]", GetNodeName(), index, reuse_input->second); + outputs_start_[index] = inputs_start_[reuse_input->second]; + } else { + GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr)); + GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", node_item_->NodeName().c_str(), + index, outputs_start_[index].GetSize()); + } } } diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc index 838aeb0d..5d1057c8 100644 --- a/src/ge/init/gelib.cc +++ b/src/ge/init/gelib.cc @@ -56,6 +56,7 @@ const int kDefaultDeviceIdForInfer = -1; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); +const char *const kGlobalOptionFpCeilingModeDefault = "2"; } // namespace static std::shared_ptr instancePtr_ = nullptr; @@ -77,6 +78,11 @@ Status GELib::Initialize(const map &options) { return ret; } instancePtr_->SetDefaultPrecisionMode(new_options); + + if (new_options.find("ge.fpCeilingMode") == new_options.end()) { + new_options["ge.fpCeilingMode"] = kGlobalOptionFpCeilingModeDefault; + } + GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GE_TIMESTAMP_START(Init); diff --git a/src/ge/ir_build/ge_ir_build.cc b/src/ge/ir_build/ge_ir_build.cc index b526342d..9764ddaf 100644 --- a/src/ge/ir_build/ge_ir_build.cc +++ b/src/ge/ir_build/ge_ir_build.cc @@ -18,6 +18,7 @@ #include #include "common/auth/file_saver.h" +#include "common/ge/tbe_plugin_manager.h" #include "external/register/register_types.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" @@ -36,6 +37,7 @@ #include "ir_build/atc_ir_common.h" #include "model/ge_model.h" #include "graph/shape_refiner.h" +#include "graph/opsproto_manager.h" using std::string; using namespace std; @@ -106,6 +108,37 @@ static graphStatus CheckGlobalOptions(std::map &global return GRAPH_SUCCESS; } +static void GetOpsProtoPath(string &opsproto_path) { + GELOGI("Start to get ops proto path schedule."); + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + string path = path_env; + string file_path = RealPath(path.c_str()); + if (file_path.empty()) { + GELOGE(FAILED, "File path %s is invalid.", path.c_str()); + return; + } + opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); + GELOGI("Get opsproto so path from env : %s", path.c_str()); + return; + } + string path_base = PluginManager::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); +} + +static void LoadOpsProto() { + string opsproto_path; + GetOpsProtoPath(opsproto_path); + GELOGI("Get opsproto path is %s", opsproto_path.c_str()); + OpsProtoManager *manager = OpsProtoManager::Instance(); + map option_tmp; + option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); + (void)manager->Initialize(option_tmp); +} + graphStatus aclgrphBuildInitialize(std::map global_options) { GELOGD("Enter aclgrphInitialize start!"); // check global options @@ -113,9 +146,12 @@ graphStatus aclgrphBuildInitialize(std::map global_opt GELOGE(GRAPH_PARAM_INVALID, "Check global options falied!"); return GRAPH_PARAM_INVALID; } + // print global option map ge::PrintOptionMap(global_options, "global option"); + LoadOpsProto(); + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGI("aclgrphInitialize start!"); @@ -124,6 +160,8 @@ graphStatus aclgrphBuildInitialize(std::map global_opt GELOGE(ret, "GE initialize failed!"); return GRAPH_FAILED; } + // for functional subgraph assign _parent_index. + TBEPluginManager::Instance().InitPreparation(global_options); } GELOGW("gelib has been initialized!"); return GRAPH_SUCCESS; @@ -131,6 +169,7 @@ graphStatus aclgrphBuildInitialize(std::map global_opt void aclgrphBuildFinalize() { if (ge::GELib::GetInstance() != nullptr && ge::GELib::GetInstance()->InitFlag()) { + (void)TBEPluginManager::Instance().Finalize(); (void)ge::GELib::GetInstance()->Finalize(); return; } @@ -169,6 +208,7 @@ class Impl { bool is_dynamic_input); void SetRtSocVersion(); void UpdateThreadContext(); + void LoadOpsProto(); public: ge::GeGenerator generator_; @@ -438,6 +478,12 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { auto compute_graph = GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); + auto root_graph = compute_graph->GetParentGraph(); + if (root_graph != nullptr) { + GELOGE(GRAPH_PARAM_INVALID, "Input param should not be subgraph"); + return GRAPH_PARAM_INVALID; + } + auto ret = compute_graph->InferOriginFormat(); if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Acl InferOriginFormat failed."); diff --git a/src/ge/plugin/engine/module.mk b/src/ge/plugin/engine/module.mk index 170cfc68..0e4a8e7f 100644 --- a/src/ge/plugin/engine/module.mk +++ b/src/ge/plugin/engine/module.mk @@ -11,9 +11,13 @@ COMMON_LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../ \ $(LOCAL_PATH)/../../ \ $(TOPDIR)inc \ + $(TOPDIR)metadef/inc \ + $(TOPDIR)graphengine/inc \ $(TOPDIR)inc/external \ - $(TOPDIR)inc/framework \ - $(TOPDIR)inc/framework/common \ + $(TOPDIR)metadef/inc/external \ + $(TOPDIR)graphengine/inc/external \ + $(TOPDIR)graphengine/inc/framework \ + $(TOPDIR)graphengine/inc/framework/common \ #compiler for host libengine include $(CLEAR_VARS) @@ -56,4 +60,4 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_LDFLAGS := -lrt -ldl -include $(BUILD_SHARED_LIBRARY) \ No newline at end of file +include $(BUILD_SHARED_LIBRARY) diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc index cf7f3047..e9d8487a 100644 --- a/src/ge/session/inner_session.cc +++ b/src/ge/session/inner_session.cc @@ -166,6 +166,24 @@ Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph, return SUCCESS; } +Status InnerSession::AddGraphWithCopy(uint32_t graph_id, const Graph &graph, + const std::map &options) { + std::lock_guard lock(resource_mutex_); + if (!init_flag_) { + GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); + return GE_SESS_INIT_FAILED; + } + UpdateThreadContext(options); + Status ret = graph_manager_.AddGraphWithCopy(graph_id, graph, options, domi::GetContext()); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id); + return ret; + } + + GELOGI("[InnerSession:%lu] add graph success, graph_id=%u.", session_id_, graph_id); + return SUCCESS; +} + Status InnerSession::RunGraph(uint32_t graph_id, const std::vector &inputs, std::vector &outputs) { GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); if (mutex_.try_lock()) { diff --git a/src/ge/session/inner_session.h b/src/ge/session/inner_session.h index 9bb12b64..bbbedca5 100644 --- a/src/ge/session/inner_session.h +++ b/src/ge/session/inner_session.h @@ -37,6 +37,8 @@ class InnerSession { Status AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options); + Status AddGraphWithCopy(uint32_t graph_id, const Graph &graph, const std::map &options); + Status RunGraph(uint32_t graph_id, const std::vector &inputs, std::vector &outputs); Status RemoveGraph(uint32_t graph_id); diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc index 8fe9bbb5..9c4ee9f0 100644 --- a/src/ge/session/omg.cc +++ b/src/ge/session/omg.cc @@ -485,6 +485,10 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); return domi::FAILED; } + + // add user_define_output_nodes attr. + (void)ge::AttrUtils::SetStr(op_desc, ATTR_ATC_USER_DEFINE_OUTPUT_NODES, "true"); + if (i < output_formats.size()) { if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); diff --git a/src/ge/session/session_manager.cc b/src/ge/session/session_manager.cc index 35d97c31..978488ec 100644 --- a/src/ge/session/session_manager.cc +++ b/src/ge/session/session_manager.cc @@ -170,6 +170,36 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G return innerSession->AddGraph(graph_id, graph, options); } +Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph, + const std::map &options) { + if (!init_flag_) { + GELOGE(GE_SESSION_MANAGER_NOT_INIT); + return GE_SESSION_MANAGER_NOT_INIT; + } + SessionPtr innerSession = nullptr; + { + std::lock_guard lock(mutex_); + std::map::iterator it = session_manager_map_.find(session_id); + if (it == session_manager_map_.end()) { + return GE_SESSION_NOT_EXIST; + } else { + innerSession = it->second; + } + auto compute_graph = GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + std::string session_graph_id = std::to_string(session_id) + "_" + std::to_string(graph_id); + if (!AttrUtils::SetStr(*compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id)) { + GELOGW("Set graph session_graph_id attr failed."); + } else { + GELOGD("Set graph session_graph_id attr to [%s]", session_graph_id.c_str()); + } + for (auto graph : compute_graph->GetAllSubgraphs()) { + AttrUtils::SetStr(*graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); + } + } + return innerSession->AddGraphWithCopy(graph_id, graph, options); +} + Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector &inputs, std::vector &outputs) { if (!init_flag_) { diff --git a/src/ge/session/session_manager.h b/src/ge/session/session_manager.h index 1efb47d8..b79bacdb 100644 --- a/src/ge/session/session_manager.h +++ b/src/ge/session/session_manager.h @@ -62,7 +62,7 @@ class SessionManager { /// /// @ingroup ge_session - /// @brief add a graph to the session with specific session id + /// @brief add a graph to the session with specific session id and graphOptions /// @param [in] session_id session id /// @param [in] graph_id graph id /// @param [in] graph the graph to add @@ -72,6 +72,18 @@ class SessionManager { Status AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph, const std::map &options); + /// + /// @ingroup ge_session + /// @brief add a copy graph to the session with specific session id and graphOptions + /// @param [in] session_id session id + /// @param [in] graph_id graph id + /// @param [in] graph the graph to add + /// @param [in] options graph level options + /// @return Status result of function + /// + Status AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph, + const std::map &options); + /// /// @ingroup ge_session /// @brief run a graph of the session with specific session id diff --git a/src/ge/single_op/single_op.cc b/src/ge/single_op/single_op.cc index f59fb7bd..c7d49331 100644 --- a/src/ge/single_op/single_op.cc +++ b/src/ge/single_op/single_op.cc @@ -127,6 +127,7 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve size_t io_addr_num = args_.size(); if (task->GetOpTaskType() == OP_TASK_AICPU) { GELOGD("Update aicpu_TF task args"); + task->SetIoAddrsForDump(args_); auto *dst_io_addr = const_cast(reinterpret_cast(task->GetIOAddr())); GE_CHECK_NOTNULL(dst_io_addr); auto rt_ret = rtMemcpyAsync(dst_io_addr, sizeof(uint64_t) * args_.size(), &args_[0], @@ -169,11 +170,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c if (ret != SUCCESS) { return ret; } - ret = task->OpenDump(args_, stream_); - if (ret != SUCCESS) { - GELOGE(ret, "Open dump failed"); - return ret; - } } return ret; diff --git a/src/ge/single_op/task/op_task.cc b/src/ge/single_op/task/op_task.cc index 7be65ec2..edd69c07 100644 --- a/src/ge/single_op/task/op_task.cc +++ b/src/ge/single_op/task/op_task.cc @@ -41,24 +41,25 @@ void FreeHbm(void *var) { } } // namespace -Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { +Status OpTask::OpenDump(rtStream_t stream) { if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { GELOGI("Dump is open in single op,start to set dump info"); std::vector input_addrs; std::vector output_adds; auto input_size = op_desc_->GetInputsSize(); auto output_size = op_desc_->GetOutputsSize(); - auto all_size = io_addr.size(); + auto all_size = io_addrs_for_dump_.size(); if (input_size + output_size != all_size) { - GELOGE(FAILED, "io_addr size is not equal input and output size"); + GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", all_size, + input_size + output_size); return FAILED; } for (size_t i = 0; i < input_size; i++) { - uint64_t input_addr = static_cast(io_addr[i]); + uint64_t input_addr = io_addrs_for_dump_[i]; input_addrs.emplace_back(input_addr); } for (size_t j = 0; j < output_size; j++) { - uint64_t output_addr = static_cast(io_addr[input_size + j]); + uint64_t output_addr = io_addrs_for_dump_[input_size + j]; output_adds.emplace_back(output_addr); } dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream); @@ -126,6 +127,17 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); + size_t input_size = op_desc_->GetInputsSize(); + size_t output_size = op_desc_->GetOutputsSize(); + uint64_t *io_addr = reinterpret_cast(args_.get()); + std::vector io_addrs(io_addr, io_addr + input_size + output_size); + SetIoAddrsForDump(io_addrs); + auto status = OpenDump(stream); + if (status != SUCCESS) { + GELOGE(status, "Open dump failed in the tbe single op %s", this->stub_name_.c_str()); + return status; + } + return SUCCESS; } @@ -378,6 +390,12 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { } GELOGI("[TASK_INFO] is %s", this->task_info_.c_str()); + auto status = OpenDump(stream); + if (status != SUCCESS) { + GELOGE(status, "Open dump failed in aicpu single op %s", this->op_type_.c_str()); + return status; + } + GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); return SUCCESS; } @@ -655,6 +673,17 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { } GELOGD("Invoke rtCpuKernelLaunch succeeded"); + size_t input_size = op_desc_->GetInputsSize(); + size_t output_size = op_desc_->GetOutputsSize(); + uint64_t *io_addr = reinterpret_cast(io_addr_); + std::vector io_addrs(io_addr, io_addr + input_size + output_size); + SetIoAddrsForDump(io_addrs); + auto status = OpenDump(stream); + if (status != SUCCESS) { + GELOGE(status, "Open dump failed in the aicpucc single op %s", this->kernel_name_.c_str()); + return status; + } + return SUCCESS; } diff --git a/src/ge/single_op/task/op_task.h b/src/ge/single_op/task/op_task.h index 0c65f8a3..2975a900 100644 --- a/src/ge/single_op/task/op_task.h +++ b/src/ge/single_op/task/op_task.h @@ -56,7 +56,8 @@ class OpTask { const vector &GetWorkspaceSizes() const; void SetWorkspaceSizes(const vector &workspace_sizes); const OpDescPtr &GetOpdesc() const { return op_desc_; } - Status OpenDump(const std::vector &io_addr, rtStream_t stream); + Status OpenDump(rtStream_t stream); + void SetIoAddrsForDump(const vector &io_addrs_for_dump) { io_addrs_for_dump_ = io_addrs_for_dump; } virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, std::vector &output_desc, std::vector &output_buffers, rtStream_t stream) { @@ -70,6 +71,7 @@ class OpTask { DumpProperties dump_properties_; DumpOp dump_op_; OpDescPtr op_desc_; + std::vector io_addrs_for_dump_; }; class TbeOpTask : public OpTask { @@ -162,9 +164,11 @@ class AiCpuTask : public AiCpuBaseTask { friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; std::string task_info_; + // device addr void *args_ = nullptr; size_t arg_size_ = 0; std::string op_type_; + // device addr void *io_addr_ = nullptr; bool dynamic_flag_ = false; diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index b9415b2e..103c8497 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -30,11 +30,6 @@ extern "C" { #define MMPA_MACINFO_DEFAULT_SIZE 18 #define MMPA_CPUDESC_DEFAULT_SIZE 64 -MMPA_DLL_API extern char *optarg; -MMPA_DLL_API extern int opterr; -MMPA_DLL_API extern int optind; -MMPA_DLL_API extern int optopt; - #pragma section(".CRT$XCU", long, read) #pragma section(".CRT$XPU", long, read) diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 3c7afd95..20b586d6 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -390,7 +390,7 @@ typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); -typedef void (*rtProfilingCallback)(uint32_t devId, bool isOpenDevice); +typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** * @ingroup dvrt_base @@ -420,7 +420,7 @@ RTS_API rtError_t rtSetProfDirEx(const char *profDir, const char *address, const * @ingroup profiling_base * @brief init profiler object. */ -RTS_API rtError_t rtProfilerInit(const char *profdir, const char *address, const char *job_ctx); +RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const char *jobCtx); /** * @ingroup profiling_base @@ -477,10 +477,12 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); /** * @ingroup dvrt_base * @brief register callback for deviceid + * @param [in] uniName unique register name, can't be null + * @param [in] callback Device state callback function * @param [out] NA * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtSetPoriflingCallback(rtProfilingCallback callback); +RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); /** * @ingroup dvrt_base @@ -602,7 +604,7 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for input null ptr */ -RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskid, uint32_t *streamid); +RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index dddb1e10..fa844677 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -295,7 +295,7 @@ RTS_API rtError_t rtGetRunMode(rtRunMode *mode); * @return RT_ERROR_NONE for ok * @return RT_ERROR_DRV_ERR for can not get aicpu deploy */ -RTS_API rtError_t rtGetAicpuDeploy(rtAicpuDeployType_t *deplyType); +RTS_API rtError_t rtGetAicpuDeploy(rtAicpuDeployType_t *deployType); /** * @ingroup dvrt_dev @@ -309,7 +309,7 @@ RTS_API rtError_t rtSetSocVersion(const char *version); * @brief get chipType * @return RT_ERROR_NONE for ok */ -rtError_t rtGetSocVersion(char *version, const uint32_t maxLen); +RTS_API rtError_t rtGetSocVersion(char *version, const uint32_t maxLen); /** * @ingroup dvrt_dev diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index d24af6fa..f9d2eae2 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -57,6 +57,16 @@ RTS_API rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag); */ RTS_API rtError_t rtEventDestroy(rtEvent_t event); +/** + * @ingroup dvrt_event + * @brief get event id + * @param [in] event_ event to be get + * @param [in|out] event_id event_id id + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetEventID(rtEvent_t event, uint32_t *eventId); + /** * @ingroup dvrt_event * @brief event record @@ -184,7 +194,7 @@ RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char *name); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtGetNotifyID(rtNotify_t notify, uint32_t *notify_id); +RTS_API rtError_t rtGetNotifyID(rtNotify_t notify, uint32_t *notifyId); /** * @ingroup dvrt_event diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index cc1dc05d..d26ca937 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -473,7 +473,7 @@ RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char * * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -rtError_t rtIpcDestroyMemoryName(const char *name); +RTS_API rtError_t rtIpcDestroyMemoryName(const char *name); /** * @ingroup dvrt_mem diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 5d49c32a..c96349a0 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -438,7 +438,8 @@ RTS_API rtError_t rtModelGetId(rtModel_t model, uint32_t *modelId); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId); +RTS_API rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, + uint32_t *streamId, uint32_t *taskId); /* * @ingroup rt_model diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 0d973851..631c8083 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -126,17 +126,17 @@ RTS_API rtError_t rtGetStreamId(rtStream_t stream, int32_t *streamId); * @return RT_ERROR_NONE for complete * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *MaxStrCount, uint32_t *MaxTaskCount); +RTS_API rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *maxStrCount, uint32_t *maxTaskCount); /** * @ingroup dvrt_stream * @brief Name a stream - * @param [in] stream_ stream to be named + * @param [in] stream stream to be named * @param [in] name identification name * @return RT_ERROR_NONE for complete * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtNameStream(rtStream_t stream_, const char *name); +RTS_API rtError_t rtNameStream(rtStream_t stream, const char *name); /** * @ingroup dvrt_stream @@ -162,18 +162,18 @@ RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t val * @param [in] dataType data type of target value * @return RT_ERROR_NONE for complete */ -RTS_API rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *value_ptr, rtStream_t true_stream, +RTS_API rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *valuePtr, rtStream_t trueStream, rtStream_t stream, rtSwitchDataType_t dataType); /** * @ingroup dvrt_stream * @brief Active a stream - * @param [in] active_stream stream to be activated + * @param [in] activeStream stream to be activated * @param [in] stream input stream to init task * @return RT_ERROR_NONE for complete * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream); +RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream); /** * @brief execute extensible stream case switch task diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index c8715041..835070ab 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MSPROF_ENGINE_PROF_ACL_API_H_ -#define MSPROF_ENGINE_PROF_ACL_API_H_ +#ifndef MSPROFILER_API_PROF_ACL_API_H_ +#define MSPROFILER_API_PROF_ACL_API_H_ #define MSVP_MAX_DEV_NUM 64 #define MSVP_PROF_API __attribute__((visibility("default"))) @@ -78,6 +78,9 @@ enum ProfErrorCode { PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics PROF_ERROR_REPEAT_START, // profiilng has already been started PROF_ERROR_NOT_STARTED, // profiling has not been started + PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed + PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed + PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode }; /** @@ -107,7 +110,8 @@ enum ProfAicoreMetrics { PROF_AICORE_MEMORY = 3, PROF_AICORE_INTERNAL_MEMORY = 4, PROF_AICORE_STALL = 5, - PROF_AICORE_EVENT = 255 + PROF_AICORE_METRICS_COUNT, + PROF_AICORE_NONE = 0xff, }; /** @@ -130,12 +134,54 @@ struct ProfConfig { MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); /** - * @name ProfStopConfig - * @brief struct of ProfStop + * @name ProfStopProfiling + * @brief stop profiling + * @param profStopCfg [IN] config to stop profiling + * @return ProfErrorCode */ -struct ProfStopConfig { - uint64_t padding; -}; +MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); + +/** + * @name ProfFinalize + * @brief finalize profiling task + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfFinalize(); + +/** + * @name ProfGetDataTypeConfig + * @brief get dataTypeConfig started with of one device + * @param deviceId [IN] deviceId to get dataTypeConfig + * @param dataTypeConfig [OUT] result get + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); + +namespace Msprofiler { +namespace Api { +/** + * @brief transfer profiling config in acl.json to sample config + * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...} + * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]} + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg); + +/** + * @name ProfInit + * @brief init profiling + * @param profInitCfg [IN] config of init profiling of json format + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg); + +/** + * @name ProfStartProfiling + * @brief start profiling + * @param profStartCfg [IN] config to start profiling + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg); /** * @name ProfStopProfiling @@ -161,4 +207,181 @@ MSVP_PROF_API int32_t ProfFinalize(); */ MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); -#endif // MSPROF_ENGINE_PROF_ACL_API_H_ +/** + * @name WorkMode + * @brief profiling api work mode + */ +enum WorkMode { + WORK_MODE_OFF, // profiling not at work + WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit) + WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode +}; + +/** + * @name ProfGetApiWorkMode + * @brief get profiling api work mode + * @return WorkMode + */ +MSVP_PROF_API WorkMode ProfGetApiWorkMode(); + +/** + * @name ProfSubscribeConfig + * @brief config of subscribe api + */ +struct ProfSubscribeConfig { + bool timeInfo; // subscribe op time + ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics + void* fd; // pipe fd +}; + +/** + * @name ProfGetDataTypeConfig + * @brief get DataTypeConfig of subscribe + * @param profSubscribeConfig [IN] config to subscribe data + * @return DataTypeConfig + */ +MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig); + +/** + * @name ProfModelSubscribe + * @brief subscribe data of one model id + * @param modelId [IN] model id to subscribe data + * @param devId [IN] device id of model + * @param profSubscribeConfig [IN] config to subscribe data + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId, + const ProfSubscribeConfig *profSubscribeConfig); + +/** + * @name ProfIsModelSubscribed + * @brief check if a model id is subscribed + * @param modeiId [IN] modei id to check + * @return true: subscribed, false: not + */ +MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId); + +/** + * @name ProfModelUnSubscribe + * @brief unsubscribe a model id + * @param modeiId [IN] modei id to unsubscribe + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId); + +/** + * @name ProfGetOpDescSize + * @brief get profiling data struct size + * @param opDescSize [OUT] bytes of profiling subscribe data struct + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize); + +/** + * @name ProfGetOpNum + * @brief get how many op data there are in data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param opNum [OUT] number of op in data + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum); + +/** + * @name ProfGetModelId + * @brief get model id of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return model id + */ +MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index); + +/** + * @name ProfGetOpType + * @brief get op type of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param opType [OUT] op type buffer + * @param opTypeLen [IN] buffer size of param opType + * @param index [IN] index of part(op) + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index); + +/** + * @name ProfGetOpName + * @brief get op name of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param opType [OUT] op name buffer + * @param opTypeLen [IN] buffer size of param opName + * @param index [IN] index of part(op) + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index); + +/** + * @name ProfGetOpStart + * @brief get op start timestamp of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op start timestamp (us) + */ +MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index); + +/** + * @name ProfGetOpEnd + * @brief get op end timestamp of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op end timestamp (us) + */ +MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index); + +/** + * @name ProfGetOpDuration + * @brief get op duration of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op duration (us) + */ +MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index); + +/** + * @name ProfGetOpExecutionTime + * @brief get op execution time of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op execution time (us) + */ +MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); + +/** + * @name ProfGetOpCubeOps + * @brief get op cube fops of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op cube fops + */ +MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index); + +/** + * @name ProfGetOpVectorOps + * @brief get op vector fops of specific part of data + * @param data [IN] data read from pipe + * @param len [IN] data length + * @param index [IN] index of part(op) + * @return op vector fops + */ +MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index); + +} // namespace Api +} // namespace Msprofiler + +#endif // MSPROFILER_API_PROF_ACL_API_H_